diff options
Diffstat (limited to 'ext/boost/regex')
55 files changed, 22927 insertions, 0 deletions
diff --git a/ext/boost/regex/concepts.hpp b/ext/boost/regex/concepts.hpp new file mode 100644 index 0000000000..98fd59413f --- /dev/null +++ b/ext/boost/regex/concepts.hpp @@ -0,0 +1,906 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE concepts.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares regular expression concepts. + */ + +#ifndef BOOST_REGEX_CONCEPTS_HPP_INCLUDED +#define BOOST_REGEX_CONCEPTS_HPP_INCLUDED + +#include <boost/concept_archetype.hpp> +#include <boost/concept_check.hpp> +#include <boost/type_traits/is_enum.hpp> +#include <boost/type_traits/is_base_and_derived.hpp> +#include <boost/static_assert.hpp> +#ifndef BOOST_TEST_TR1_REGEX +#include <boost/regex.hpp> +#endif +#include <bitset> +#include <vector> +#include <iostream> + +namespace boost{ + +// +// bitmask_archetype: +// this can be either an integer type, an enum, or a std::bitset, +// we use the latter as the architype as it offers the "strictest" +// of the possible interfaces: +// +typedef std::bitset<512> bitmask_archetype; +// +// char_architype: +// A strict model for the character type interface. +// +struct char_architype +{ + // default constructable: + char_architype(); + // copy constructable / assignable: + char_architype(const char_architype&); + char_architype& operator=(const char_architype&); + // constructable from an integral value: + char_architype(unsigned long val); + // comparable: + bool operator==(const char_architype&)const; + bool operator!=(const char_architype&)const; + bool operator<(const char_architype&)const; + bool operator<=(const char_architype&)const; + bool operator>=(const char_architype&)const; + bool operator>(const char_architype&)const; + // conversion to integral type: + operator long()const; +}; +// +// char_architype can not be used with basic_string: +// +} // namespace boost +namespace std{ + template<> struct char_traits<boost::char_architype> + { + // The intent is that this template is not instantiated, + // but this typedef gives us a chance of compilation in + // case it is: + typedef boost::char_architype char_type; + }; +} +namespace boost{ +// +// regex_traits_architype: +// A strict interpretation of the regular expression traits class requirements. +// +template <class charT> +struct regex_traits_architype +{ +public: + regex_traits_architype(); + typedef charT char_type; + // typedef std::size_t size_type; + typedef std::vector<char_type> string_type; + typedef copy_constructible_archetype<assignable_archetype<> > locale_type; + typedef bitmask_archetype char_class_type; + + static std::size_t length(const char_type* ) { return 0; } + + charT translate(charT ) const { return charT(); } + charT translate_nocase(charT ) const { return static_object<charT>::get(); } + + template <class ForwardIterator> + string_type transform(ForwardIterator , ForwardIterator ) const + { return static_object<string_type>::get(); } + template <class ForwardIterator> + string_type transform_primary(ForwardIterator , ForwardIterator ) const + { return static_object<string_type>::get(); } + + template <class ForwardIterator> + char_class_type lookup_classname(ForwardIterator , ForwardIterator ) const + { return static_object<char_class_type>::get(); } + template <class ForwardIterator> + string_type lookup_collatename(ForwardIterator , ForwardIterator ) const + { return static_object<string_type>::get(); } + + bool isctype(charT, char_class_type) const + { return false; } + int value(charT, int) const + { return 0; } + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return static_object<locale_type>::get(); } + +private: + // this type is not copyable: + regex_traits_architype(const regex_traits_architype&); + regex_traits_architype& operator=(const regex_traits_architype&); +}; + +// +// alter this to std::tr1, to test a std implementation: +// +#ifndef BOOST_TEST_TR1_REGEX +namespace global_regex_namespace = ::boost; +#else +namespace global_regex_namespace = ::std::tr1; +#endif + +template <class Bitmask> +struct BitmaskConcept +{ + void constraints() + { + function_requires<CopyConstructibleConcept<Bitmask> >(); + function_requires<AssignableConcept<Bitmask> >(); + + m_mask1 = m_mask2 | m_mask3; + m_mask1 = m_mask2 & m_mask3; + m_mask1 = m_mask2 ^ m_mask3; + + m_mask1 = ~m_mask2; + + m_mask1 |= m_mask2; + m_mask1 &= m_mask2; + m_mask1 ^= m_mask2; + } + Bitmask m_mask1, m_mask2, m_mask3; +}; + +template <class traits> +struct RegexTraitsConcept +{ + RegexTraitsConcept(); + // required typedefs: + typedef typename traits::char_type char_type; + // typedef typename traits::size_type size_type; + typedef typename traits::string_type string_type; + typedef typename traits::locale_type locale_type; + typedef typename traits::char_class_type char_class_type; + + void constraints() + { + //function_requires<UnsignedIntegerConcept<size_type> >(); + function_requires<RandomAccessContainerConcept<string_type> >(); + function_requires<DefaultConstructibleConcept<locale_type> >(); + function_requires<CopyConstructibleConcept<locale_type> >(); + function_requires<AssignableConcept<locale_type> >(); + function_requires<BitmaskConcept<char_class_type> >(); + + std::size_t n = traits::length(m_pointer); + ignore_unused_variable_warning(n); + + char_type c = m_ctraits.translate(m_char); + ignore_unused_variable_warning(c); + c = m_ctraits.translate_nocase(m_char); + + //string_type::foobar bar; + string_type s1 = m_ctraits.transform(m_pointer, m_pointer); + ignore_unused_variable_warning(s1); + + string_type s2 = m_ctraits.transform_primary(m_pointer, m_pointer); + ignore_unused_variable_warning(s2); + + char_class_type cc = m_ctraits.lookup_classname(m_pointer, m_pointer); + ignore_unused_variable_warning(cc); + + string_type s3 = m_ctraits.lookup_collatename(m_pointer, m_pointer); + ignore_unused_variable_warning(s3); + + bool b = m_ctraits.isctype(m_char, cc); + ignore_unused_variable_warning(b); + + int v = m_ctraits.value(m_char, 16); + ignore_unused_variable_warning(v); + + locale_type l(m_ctraits.getloc()); + m_traits.imbue(l); + ignore_unused_variable_warning(l); + } + traits m_traits; + const traits m_ctraits; + const char_type* m_pointer; + char_type m_char; +private: + RegexTraitsConcept& operator=(RegexTraitsConcept&); +}; + +// +// helper class to compute what traits class a regular expression type is using: +// +template <class Regex> +struct regex_traits_computer; + +template <class charT, class traits> +struct regex_traits_computer< global_regex_namespace::basic_regex<charT, traits> > +{ + typedef traits type; +}; + +// +// BaseRegexConcept does not test anything dependent on basic_string, +// in case our charT does not have an associated char_traits: +// +template <class Regex> +struct BaseRegexConcept +{ + typedef typename Regex::value_type value_type; + //typedef typename Regex::size_type size_type; + typedef typename Regex::flag_type flag_type; + typedef typename Regex::locale_type locale_type; + typedef input_iterator_archetype<value_type> input_iterator_type; + + // derived test types: + typedef const value_type* pointer_type; + typedef bidirectional_iterator_archetype<value_type> BidiIterator; + typedef global_regex_namespace::sub_match<BidiIterator> sub_match_type; + typedef global_regex_namespace::match_results<BidiIterator> match_results_type; + typedef output_iterator_archetype<value_type> OutIterator; + typedef typename regex_traits_computer<Regex>::type traits_type; + typedef global_regex_namespace::regex_iterator<BidiIterator, value_type, traits_type> regex_iterator_type; + typedef global_regex_namespace::regex_token_iterator<BidiIterator, value_type, traits_type> regex_token_iterator_type; + + void global_constraints() + { + // + // test non-template components: + // + function_requires<BitmaskConcept<global_regex_namespace::regex_constants::syntax_option_type> >(); + global_regex_namespace::regex_constants::syntax_option_type opts + = global_regex_namespace::regex_constants::icase + | global_regex_namespace::regex_constants::nosubs + | global_regex_namespace::regex_constants::optimize + | global_regex_namespace::regex_constants::collate + | global_regex_namespace::regex_constants::ECMAScript + | global_regex_namespace::regex_constants::basic + | global_regex_namespace::regex_constants::extended + | global_regex_namespace::regex_constants::awk + | global_regex_namespace::regex_constants::grep + | global_regex_namespace::regex_constants::egrep; + ignore_unused_variable_warning(opts); + + function_requires<BitmaskConcept<global_regex_namespace::regex_constants::match_flag_type> >(); + global_regex_namespace::regex_constants::match_flag_type mopts + = global_regex_namespace::regex_constants::match_default + | global_regex_namespace::regex_constants::match_not_bol + | global_regex_namespace::regex_constants::match_not_eol + | global_regex_namespace::regex_constants::match_not_bow + | global_regex_namespace::regex_constants::match_not_eow + | global_regex_namespace::regex_constants::match_any + | global_regex_namespace::regex_constants::match_not_null + | global_regex_namespace::regex_constants::match_continuous + | global_regex_namespace::regex_constants::match_prev_avail + | global_regex_namespace::regex_constants::format_default + | global_regex_namespace::regex_constants::format_sed + | global_regex_namespace::regex_constants::format_no_copy + | global_regex_namespace::regex_constants::format_first_only; + ignore_unused_variable_warning(mopts); + + BOOST_STATIC_ASSERT((::boost::is_enum<global_regex_namespace::regex_constants::error_type>::value)); + global_regex_namespace::regex_constants::error_type e1 = global_regex_namespace::regex_constants::error_collate; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_ctype; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_escape; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_backref; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_brack; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_paren; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_brace; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_badbrace; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_range; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_space; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_badrepeat; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_complexity; + ignore_unused_variable_warning(e1); + e1 = global_regex_namespace::regex_constants::error_stack; + ignore_unused_variable_warning(e1); + + BOOST_STATIC_ASSERT((::boost::is_base_and_derived<std::runtime_error, global_regex_namespace::regex_error>::value )); + const global_regex_namespace::regex_error except(e1); + e1 = except.code(); + + typedef typename Regex::value_type value_type; + function_requires< RegexTraitsConcept<global_regex_namespace::regex_traits<char> > >(); + function_requires< BaseRegexConcept<global_regex_namespace::basic_regex<char> > >(); + } + void constraints() + { + global_constraints(); + + BOOST_STATIC_ASSERT((::boost::is_same< flag_type, global_regex_namespace::regex_constants::syntax_option_type>::value)); + flag_type opts + = Regex::icase + | Regex::nosubs + | Regex::optimize + | Regex::collate + | Regex::ECMAScript + | Regex::basic + | Regex::extended + | Regex::awk + | Regex::grep + | Regex::egrep; + ignore_unused_variable_warning(opts); + + function_requires<DefaultConstructibleConcept<Regex> >(); + function_requires<CopyConstructibleConcept<Regex> >(); + + // Regex constructors: + Regex e1(m_pointer); + ignore_unused_variable_warning(e1); + Regex e2(m_pointer, m_flags); + ignore_unused_variable_warning(e2); + Regex e3(m_pointer, m_size, m_flags); + ignore_unused_variable_warning(e3); + Regex e4(in1, in2); + ignore_unused_variable_warning(e4); + Regex e5(in1, in2, m_flags); + ignore_unused_variable_warning(e5); + + // assign etc: + Regex e; + e = m_pointer; + e = e1; + e.assign(e1); + e.assign(m_pointer); + e.assign(m_pointer, m_flags); + e.assign(m_pointer, m_size, m_flags); + e.assign(in1, in2); + e.assign(in1, in2, m_flags); + + // access: + const Regex ce; + unsigned i = ce.mark_count(); + ignore_unused_variable_warning(i); + m_flags = ce.flags(); + e.imbue(ce.getloc()); + e.swap(e1); + + global_regex_namespace::swap(e, e1); + + // sub_match: + BOOST_STATIC_ASSERT((::boost::is_base_and_derived<std::pair<BidiIterator, BidiIterator>, sub_match_type>::value)); + typedef typename sub_match_type::value_type sub_value_type; + typedef typename sub_match_type::difference_type sub_diff_type; + typedef typename sub_match_type::iterator sub_iter_type; + BOOST_STATIC_ASSERT((::boost::is_same<sub_value_type, value_type>::value)); + BOOST_STATIC_ASSERT((::boost::is_same<sub_iter_type, BidiIterator>::value)); + bool b = m_sub.matched; + ignore_unused_variable_warning(b); + BidiIterator bi = m_sub.first; + ignore_unused_variable_warning(bi); + bi = m_sub.second; + ignore_unused_variable_warning(bi); + sub_diff_type diff = m_sub.length(); + ignore_unused_variable_warning(diff); + // match_results tests: + typedef typename match_results_type::value_type mr_value_type; + typedef typename match_results_type::const_reference mr_const_reference; + typedef typename match_results_type::reference mr_reference; + typedef typename match_results_type::const_iterator mr_const_iterator; + typedef typename match_results_type::iterator mr_iterator; + typedef typename match_results_type::difference_type mr_difference_type; + typedef typename match_results_type::size_type mr_size_type; + typedef typename match_results_type::allocator_type mr_allocator_type; + typedef typename match_results_type::char_type mr_char_type; + typedef typename match_results_type::string_type mr_string_type; + + match_results_type m1; + mr_allocator_type at; + match_results_type m2(at); + match_results_type m3(m1); + m1 = m2; + + int ival = 0; + + mr_size_type mrs = m_cresults.size(); + ignore_unused_variable_warning(mrs); + mrs = m_cresults.max_size(); + ignore_unused_variable_warning(mrs); + b = m_cresults.empty(); + ignore_unused_variable_warning(b); + mr_difference_type mrd = m_cresults.length(); + ignore_unused_variable_warning(mrd); + mrd = m_cresults.length(ival); + ignore_unused_variable_warning(mrd); + mrd = m_cresults.position(); + ignore_unused_variable_warning(mrd); + mrd = m_cresults.position(mrs); + ignore_unused_variable_warning(mrd); + + mr_const_reference mrcr = m_cresults[ival]; + ignore_unused_variable_warning(mrcr); + mr_const_reference mrcr2 = m_cresults.prefix(); + ignore_unused_variable_warning(mrcr2); + mr_const_reference mrcr3 = m_cresults.suffix(); + ignore_unused_variable_warning(mrcr3); + mr_const_iterator mrci = m_cresults.begin(); + ignore_unused_variable_warning(mrci); + mrci = m_cresults.end(); + ignore_unused_variable_warning(mrci); + + mr_allocator_type at2 = m_cresults.get_allocator(); + m_results.swap(m_results); + global_regex_namespace::swap(m_results, m_results); + + // regex_match: + b = global_regex_namespace::regex_match(m_in, m_in, m_results, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_in, m_in, m_results, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_in, m_in, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_in, m_in, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_pointer, m_pmatch, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_pointer, m_pmatch, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_pointer, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_pointer, e, m_mft); + ignore_unused_variable_warning(b); + // regex_search: + b = global_regex_namespace::regex_search(m_in, m_in, m_results, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_in, m_in, m_results, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_in, m_in, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_in, m_in, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_pointer, m_pmatch, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_pointer, m_pmatch, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_pointer, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_pointer, e, m_mft); + ignore_unused_variable_warning(b); + + // regex_iterator: + typedef typename regex_iterator_type::regex_type rit_regex_type; + typedef typename regex_iterator_type::value_type rit_value_type; + typedef typename regex_iterator_type::difference_type rit_difference_type; + typedef typename regex_iterator_type::pointer rit_pointer; + typedef typename regex_iterator_type::reference rit_reference; + typedef typename regex_iterator_type::iterator_category rit_iterator_category; + BOOST_STATIC_ASSERT((::boost::is_same<rit_regex_type, Regex>::value)); + BOOST_STATIC_ASSERT((::boost::is_same<rit_value_type, match_results_type>::value)); + BOOST_STATIC_ASSERT((::boost::is_same<rit_difference_type, std::ptrdiff_t>::value)); + BOOST_STATIC_ASSERT((::boost::is_same<rit_pointer, const match_results_type*>::value)); + BOOST_STATIC_ASSERT((::boost::is_same<rit_reference, const match_results_type&>::value)); + BOOST_STATIC_ASSERT((::boost::is_convertible<rit_iterator_category*, std::forward_iterator_tag*>::value)); + // this takes care of most of the checks needed: + function_requires<ForwardIteratorConcept<regex_iterator_type> >(); + regex_iterator_type iter1(m_in, m_in, e); + ignore_unused_variable_warning(iter1); + regex_iterator_type iter2(m_in, m_in, e, m_mft); + ignore_unused_variable_warning(iter2); + + // regex_token_iterator: + typedef typename regex_token_iterator_type::regex_type rtit_regex_type; + typedef typename regex_token_iterator_type::value_type rtit_value_type; + typedef typename regex_token_iterator_type::difference_type rtit_difference_type; + typedef typename regex_token_iterator_type::pointer rtit_pointer; + typedef typename regex_token_iterator_type::reference rtit_reference; + typedef typename regex_token_iterator_type::iterator_category rtit_iterator_category; + BOOST_STATIC_ASSERT((::boost::is_same<rtit_regex_type, Regex>::value)); + BOOST_STATIC_ASSERT((::boost::is_same<rtit_value_type, sub_match_type>::value)); + BOOST_STATIC_ASSERT((::boost::is_same<rtit_difference_type, std::ptrdiff_t>::value)); + BOOST_STATIC_ASSERT((::boost::is_same<rtit_pointer, const sub_match_type*>::value)); + BOOST_STATIC_ASSERT((::boost::is_same<rtit_reference, const sub_match_type&>::value)); + BOOST_STATIC_ASSERT((::boost::is_convertible<rtit_iterator_category*, std::forward_iterator_tag*>::value)); + // this takes care of most of the checks needed: + function_requires<ForwardIteratorConcept<regex_token_iterator_type> >(); + regex_token_iterator_type ti1(m_in, m_in, e); + ignore_unused_variable_warning(ti1); + regex_token_iterator_type ti2(m_in, m_in, e, 0); + ignore_unused_variable_warning(ti2); + regex_token_iterator_type ti3(m_in, m_in, e, 0, m_mft); + ignore_unused_variable_warning(ti3); + std::vector<int> subs; + regex_token_iterator_type ti4(m_in, m_in, e, subs); + ignore_unused_variable_warning(ti4); + regex_token_iterator_type ti5(m_in, m_in, e, subs, m_mft); + ignore_unused_variable_warning(ti5); + static const int i_array[3] = { 1, 2, 3, }; + regex_token_iterator_type ti6(m_in, m_in, e, i_array); + ignore_unused_variable_warning(ti6); + regex_token_iterator_type ti7(m_in, m_in, e, i_array, m_mft); + ignore_unused_variable_warning(ti7); + } + + pointer_type m_pointer; + flag_type m_flags; + std::size_t m_size; + input_iterator_type in1, in2; + const sub_match_type m_sub; + const value_type m_char; + match_results_type m_results; + const match_results_type m_cresults; + OutIterator m_out; + BidiIterator m_in; + global_regex_namespace::regex_constants::match_flag_type m_mft; + global_regex_namespace::match_results<pointer_type> m_pmatch; + + BaseRegexConcept(); + BaseRegexConcept(const BaseRegexConcept&); + BaseRegexConcept& operator=(const BaseRegexConcept&); +}; + +// +// RegexConcept: +// Test every interface in the std: +// +template <class Regex> +struct RegexConcept +{ + typedef typename Regex::value_type value_type; + //typedef typename Regex::size_type size_type; + typedef typename Regex::flag_type flag_type; + typedef typename Regex::locale_type locale_type; + + // derived test types: + typedef const value_type* pointer_type; + typedef std::basic_string<value_type> string_type; + typedef boost::bidirectional_iterator_archetype<value_type> BidiIterator; + typedef global_regex_namespace::sub_match<BidiIterator> sub_match_type; + typedef global_regex_namespace::match_results<BidiIterator> match_results_type; + typedef output_iterator_archetype<value_type> OutIterator; + + + void constraints() + { + function_requires<BaseRegexConcept<Regex> >(); + // string based construct: + Regex e1(m_string); + ignore_unused_variable_warning(e1); + Regex e2(m_string, m_flags); + ignore_unused_variable_warning(e2); + + // assign etc: + Regex e; + e = m_string; + e.assign(m_string); + e.assign(m_string, m_flags); + + // sub_match: + string_type s(m_sub); + ignore_unused_variable_warning(s); + s = m_sub.str(); + ignore_unused_variable_warning(s); + int i = m_sub.compare(m_string); + ignore_unused_variable_warning(i); + + int i2 = m_sub.compare(m_sub); + ignore_unused_variable_warning(i2); + i2 = m_sub.compare(m_pointer); + ignore_unused_variable_warning(i2); + + bool b = m_sub == m_sub; + ignore_unused_variable_warning(b); + b = m_sub != m_sub; + ignore_unused_variable_warning(b); + b = m_sub <= m_sub; + ignore_unused_variable_warning(b); + b = m_sub <= m_sub; + ignore_unused_variable_warning(b); + b = m_sub > m_sub; + ignore_unused_variable_warning(b); + b = m_sub >= m_sub; + ignore_unused_variable_warning(b); + + b = m_sub == m_pointer; + ignore_unused_variable_warning(b); + b = m_sub != m_pointer; + ignore_unused_variable_warning(b); + b = m_sub <= m_pointer; + ignore_unused_variable_warning(b); + b = m_sub <= m_pointer; + ignore_unused_variable_warning(b); + b = m_sub > m_pointer; + ignore_unused_variable_warning(b); + b = m_sub >= m_pointer; + ignore_unused_variable_warning(b); + + b = m_pointer == m_sub; + ignore_unused_variable_warning(b); + b = m_pointer != m_sub; + ignore_unused_variable_warning(b); + b = m_pointer <= m_sub; + ignore_unused_variable_warning(b); + b = m_pointer <= m_sub; + ignore_unused_variable_warning(b); + b = m_pointer > m_sub; + ignore_unused_variable_warning(b); + b = m_pointer >= m_sub; + ignore_unused_variable_warning(b); + + b = m_sub == m_char; + ignore_unused_variable_warning(b); + b = m_sub != m_char; + ignore_unused_variable_warning(b); + b = m_sub <= m_char; + ignore_unused_variable_warning(b); + b = m_sub <= m_char; + ignore_unused_variable_warning(b); + b = m_sub > m_char; + ignore_unused_variable_warning(b); + b = m_sub >= m_char; + ignore_unused_variable_warning(b); + + b = m_char == m_sub; + ignore_unused_variable_warning(b); + b = m_char != m_sub; + ignore_unused_variable_warning(b); + b = m_char <= m_sub; + ignore_unused_variable_warning(b); + b = m_char <= m_sub; + ignore_unused_variable_warning(b); + b = m_char > m_sub; + ignore_unused_variable_warning(b); + b = m_char >= m_sub; + ignore_unused_variable_warning(b); + + b = m_sub == m_string; + ignore_unused_variable_warning(b); + b = m_sub != m_string; + ignore_unused_variable_warning(b); + b = m_sub <= m_string; + ignore_unused_variable_warning(b); + b = m_sub <= m_string; + ignore_unused_variable_warning(b); + b = m_sub > m_string; + ignore_unused_variable_warning(b); + b = m_sub >= m_string; + ignore_unused_variable_warning(b); + + b = m_string == m_sub; + ignore_unused_variable_warning(b); + b = m_string != m_sub; + ignore_unused_variable_warning(b); + b = m_string <= m_sub; + ignore_unused_variable_warning(b); + b = m_string <= m_sub; + ignore_unused_variable_warning(b); + b = m_string > m_sub; + ignore_unused_variable_warning(b); + b = m_string >= m_sub; + ignore_unused_variable_warning(b); + + // match results: + m_string = m_results.str(); + ignore_unused_variable_warning(m_string); + m_string = m_results.str(0); + ignore_unused_variable_warning(m_string); + m_out = m_cresults.format(m_out, m_string); + m_out = m_cresults.format(m_out, m_string, m_mft); + m_string = m_cresults.format(m_string); + ignore_unused_variable_warning(m_string); + m_string = m_cresults.format(m_string, m_mft); + ignore_unused_variable_warning(m_string); + + // regex_match: + b = global_regex_namespace::regex_match(m_string, m_smatch, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_string, m_smatch, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_string, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_match(m_string, e, m_mft); + ignore_unused_variable_warning(b); + + // regex_search: + b = global_regex_namespace::regex_search(m_string, m_smatch, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_string, m_smatch, e, m_mft); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_string, e); + ignore_unused_variable_warning(b); + b = global_regex_namespace::regex_search(m_string, e, m_mft); + ignore_unused_variable_warning(b); + + // regex_replace: + m_out = global_regex_namespace::regex_replace(m_out, m_in, m_in, e, m_string, m_mft); + m_out = global_regex_namespace::regex_replace(m_out, m_in, m_in, e, m_string); + m_string = global_regex_namespace::regex_replace(m_string, e, m_string, m_mft); + ignore_unused_variable_warning(m_string); + m_string = global_regex_namespace::regex_replace(m_string, e, m_string); + ignore_unused_variable_warning(m_string); + + } + + flag_type m_flags; + string_type m_string; + const sub_match_type m_sub; + match_results_type m_results; + pointer_type m_pointer; + value_type m_char; + const match_results_type m_cresults; + OutIterator m_out; + BidiIterator m_in; + global_regex_namespace::regex_constants::match_flag_type m_mft; + global_regex_namespace::match_results<typename string_type::const_iterator> m_smatch; + + RegexConcept(); + RegexConcept(const RegexConcept&); + RegexConcept& operator=(const RegexConcept&); +}; + +#ifndef BOOST_REGEX_TEST_STD +// +// BoostRegexConcept: +// Test every interface in the Boost implementation: +// +template <class Regex> +struct BoostRegexConcept +{ + typedef typename Regex::value_type value_type; + typedef typename Regex::size_type size_type; + typedef typename Regex::flag_type flag_type; + typedef typename Regex::locale_type locale_type; + + // derived test types: + typedef const value_type* pointer_type; + typedef std::basic_string<value_type> string_type; + typedef typename Regex::const_iterator const_iterator; + typedef bidirectional_iterator_archetype<value_type> BidiIterator; + typedef global_regex_namespace::sub_match<BidiIterator> sub_match_type; + typedef global_regex_namespace::match_results<BidiIterator> match_results_type; + + void constraints() + { + global_regex_namespace::regex_constants::match_flag_type mopts + = global_regex_namespace::regex_constants::match_default + | global_regex_namespace::regex_constants::match_not_bol + | global_regex_namespace::regex_constants::match_not_eol + | global_regex_namespace::regex_constants::match_not_bow + | global_regex_namespace::regex_constants::match_not_eow + | global_regex_namespace::regex_constants::match_any + | global_regex_namespace::regex_constants::match_not_null + | global_regex_namespace::regex_constants::match_continuous + | global_regex_namespace::regex_constants::match_partial + | global_regex_namespace::regex_constants::match_prev_avail + | global_regex_namespace::regex_constants::format_default + | global_regex_namespace::regex_constants::format_sed + | global_regex_namespace::regex_constants::format_perl + | global_regex_namespace::regex_constants::format_no_copy + | global_regex_namespace::regex_constants::format_first_only; + + (void)mopts; + + function_requires<RegexConcept<Regex> >(); + const global_regex_namespace::regex_error except(global_regex_namespace::regex_constants::error_collate); + std::ptrdiff_t pt = except.position(); + ignore_unused_variable_warning(pt); + const Regex ce, ce2; +#ifndef BOOST_NO_STD_LOCALE + m_stream << ce; +#endif + unsigned i = ce.error_code(); + ignore_unused_variable_warning(i); + pointer_type p = ce.expression(); + ignore_unused_variable_warning(p); + int i2 = ce.compare(ce2); + ignore_unused_variable_warning(i2); + bool b = ce == ce2; + ignore_unused_variable_warning(b); + b = ce.empty(); + ignore_unused_variable_warning(b); + b = ce != ce2; + ignore_unused_variable_warning(b); + b = ce < ce2; + ignore_unused_variable_warning(b); + b = ce > ce2; + ignore_unused_variable_warning(b); + b = ce <= ce2; + ignore_unused_variable_warning(b); + b = ce >= ce2; + ignore_unused_variable_warning(b); + i = ce.status(); + ignore_unused_variable_warning(i); + size_type s = ce.max_size(); + ignore_unused_variable_warning(s); + s = ce.size(); + ignore_unused_variable_warning(s); + const_iterator pi = ce.begin(); + ignore_unused_variable_warning(pi); + pi = ce.end(); + ignore_unused_variable_warning(pi); + string_type s2 = ce.str(); + ignore_unused_variable_warning(s2); + + m_string = m_sub + m_sub; + ignore_unused_variable_warning(m_string); + m_string = m_sub + m_pointer; + ignore_unused_variable_warning(m_string); + m_string = m_pointer + m_sub; + ignore_unused_variable_warning(m_string); + m_string = m_sub + m_string; + ignore_unused_variable_warning(m_string); + m_string = m_string + m_sub; + ignore_unused_variable_warning(m_string); + m_string = m_sub + m_char; + ignore_unused_variable_warning(m_string); + m_string = m_char + m_sub; + ignore_unused_variable_warning(m_string); + + // Named sub-expressions: + m_sub = m_cresults[&m_char]; + ignore_unused_variable_warning(m_sub); + m_sub = m_cresults[m_string]; + ignore_unused_variable_warning(m_sub); + m_sub = m_cresults[""]; + ignore_unused_variable_warning(m_sub); + m_sub = m_cresults[std::string("")]; + ignore_unused_variable_warning(m_sub); + m_string = m_cresults.str(&m_char); + ignore_unused_variable_warning(m_string); + m_string = m_cresults.str(m_string); + ignore_unused_variable_warning(m_string); + m_string = m_cresults.str(""); + ignore_unused_variable_warning(m_string); + m_string = m_cresults.str(std::string("")); + ignore_unused_variable_warning(m_string); + + typename match_results_type::difference_type diff; + diff = m_cresults.length(&m_char); + ignore_unused_variable_warning(diff); + diff = m_cresults.length(m_string); + ignore_unused_variable_warning(diff); + diff = m_cresults.length(""); + ignore_unused_variable_warning(diff); + diff = m_cresults.length(std::string("")); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(&m_char); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(m_string); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(""); + ignore_unused_variable_warning(diff); + diff = m_cresults.position(std::string("")); + ignore_unused_variable_warning(diff); + +#ifndef BOOST_NO_STD_LOCALE + m_stream << m_sub; + m_stream << m_cresults; +#endif + } + + std::basic_ostream<value_type> m_stream; + sub_match_type m_sub; + pointer_type m_pointer; + string_type m_string; + const value_type m_char; + match_results_type m_results; + const match_results_type m_cresults; + + BoostRegexConcept(); + BoostRegexConcept(const BoostRegexConcept&); + BoostRegexConcept& operator=(const BoostRegexConcept&); +}; + +#endif // BOOST_REGEX_TEST_STD + +} + +#endif diff --git a/ext/boost/regex/config.hpp b/ext/boost/regex/config.hpp new file mode 100644 index 0000000000..8306f3ff9a --- /dev/null +++ b/ext/boost/regex/config.hpp @@ -0,0 +1,417 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE config.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: regex extended config setup. + */ + +#ifndef BOOST_REGEX_CONFIG_HPP +#define BOOST_REGEX_CONFIG_HPP +/* + * Borland C++ Fix/error check + * this has to go *before* we include any std lib headers: + */ +#if defined(__BORLANDC__) +# include <boost/regex/config/borland.hpp> +#endif + +/***************************************************************************** + * + * Include all the headers we need here: + * + ****************************************************************************/ + +#ifdef __cplusplus + +# ifndef BOOST_REGEX_USER_CONFIG +# define BOOST_REGEX_USER_CONFIG <boost/regex/user.hpp> +# endif + +# include BOOST_REGEX_USER_CONFIG + +# include <boost/config.hpp> + +#else + /* + * C build, + * don't include <boost/config.hpp> because that may + * do C++ specific things in future... + */ +# include <stdlib.h> +# include <stddef.h> +# ifdef _MSC_VER +# define BOOST_MSVC _MSC_VER +# endif +#endif + +/***************************************************************************** + * + * Boilerplate regex config options: + * + ****************************************************************************/ + +/* Obsolete macro, use BOOST_VERSION instead: */ +#define BOOST_RE_VERSION 320 + +/* fix: */ +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +/* + * Fix for gcc prior to 3.4: std::ctype<wchar_t> doesn't allow + * masks to be combined, for example: + * std::use_facet<std::ctype<wchar_t> >.is(std::ctype_base::lower|std::ctype_base::upper, L'a'); + * returns *false*. + */ +#ifdef __GLIBCPP__ +# define BOOST_REGEX_BUGGY_CTYPE_FACET +#endif + +/* + * Intel C++ before 8.0 ends up with unresolved externals unless we turn off + * extern template support: + */ +#if defined(BOOST_INTEL) && defined(__cplusplus) && (BOOST_INTEL <= 800) +# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES +#endif +/* + * Visual C++ doesn't support external templates with C++ extensions turned off: + */ +#if defined(_MSC_VER) && !defined(_MSC_EXTENSIONS) +# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES +#endif + +/* + * If there isn't good enough wide character support then there will + * be no wide character regular expressions: + */ +#if (defined(BOOST_NO_CWCHAR) || defined(BOOST_NO_CWCTYPE) || defined(BOOST_NO_STD_WSTRING)) +# if !defined(BOOST_NO_WREGEX) +# define BOOST_NO_WREGEX +# endif +#else +# if defined(__sgi) && (defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) + /* STLPort on IRIX is misconfigured: <cwctype> does not compile + * as a temporary fix include <wctype.h> instead and prevent inclusion + * of STLPort version of <cwctype> */ +# include <wctype.h> +# define __STLPORT_CWCTYPE +# define _STLP_CWCTYPE +# endif + +#ifdef __cplusplus +# include <boost/regex/config/cwchar.hpp> +#endif + +#endif + +/* + * If Win32 support has been disabled for boost in general, then + * it is for regex in particular: + */ +#if defined(BOOST_DISABLE_WIN32) && !defined(BOOST_REGEX_NO_W32) +# define BOOST_REGEX_NO_W32 +#endif + +/* disable our own file-iterators and mapfiles if we can't + * support them: */ +#if !defined(BOOST_HAS_DIRENT_H) && !(defined(_WIN32) && !defined(BOOST_REGEX_NO_W32)) +# define BOOST_REGEX_NO_FILEITER +#endif + +/* backwards compatibitity: */ +#if defined(BOOST_RE_NO_LIB) +# define BOOST_REGEX_NO_LIB +#endif + +#if defined(__GNUC__) && (defined(_WIN32) || defined(__CYGWIN__)) +/* gcc on win32 has problems if you include <windows.h> + (sporadically generates bad code). */ +# define BOOST_REGEX_NO_W32 +#endif +#if defined(__COMO__) && !defined(BOOST_REGEX_NO_W32) && !defined(_MSC_EXTENSIONS) +# define BOOST_REGEX_NO_W32 +#endif + +/***************************************************************************** + * + * Wide character workarounds: + * + ****************************************************************************/ + +/* + * define BOOST_REGEX_HAS_OTHER_WCHAR_T when wchar_t is a native type, but the users + * code may be built with wchar_t as unsigned short: basically when we're building + * with MSVC and the /Zc:wchar_t option we place some extra unsigned short versions + * of the non-inline functions in the library, so that users can still link to the lib, + * irrespective of whether their own code is built with /Zc:wchar_t. + */ +#if defined(__cplusplus) && (defined(BOOST_MSVC) || defined(__ICL)) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) && defined(BOOST_WINDOWS) && !defined(__SGI_STL_PORT) && !defined(_STLPORT_VERSION) && !defined(BOOST_RWSTD_VER) +# define BOOST_REGEX_HAS_OTHER_WCHAR_T +# ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable : 4251 4231 4660) +# endif +# if defined(_DLL) && defined(BOOST_MSVC) && (BOOST_MSVC < 1600) +# include <string> + extern template class __declspec(dllimport) std::basic_string<unsigned short>; +# endif +# ifdef BOOST_MSVC +# pragma warning(pop) +# endif +#endif + + +/***************************************************************************** + * + * Set up dll import/export options: + * + ****************************************************************************/ + +#if defined(BOOST_HAS_DECLSPEC) && (defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK)) && !defined(BOOST_REGEX_STATIC_LINK) +# if defined(BOOST_REGEX_SOURCE) +# define BOOST_REGEX_DECL __declspec(dllexport) +# define BOOST_REGEX_BUILD_DLL +# else +# define BOOST_REGEX_DECL __declspec(dllimport) +# endif +#endif + +#ifndef BOOST_REGEX_DECL +# define BOOST_REGEX_DECL +#endif + +#if !defined(BOOST_REGEX_NO_LIB) && !defined(BOOST_REGEX_SOURCE) && !defined(BOOST_ALL_NO_LIB) && defined(__cplusplus) +# define BOOST_LIB_NAME boost_regex +# if defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK) +# define BOOST_DYN_LINK +# endif +# ifdef BOOST_REGEX_DIAG +# define BOOST_LIB_DIAGNOSTIC +# endif +# include <boost/config/auto_link.hpp> +#endif + +/***************************************************************************** + * + * Set up function call type: + * + ****************************************************************************/ + +#if defined(BOOST_MSVC) && (BOOST_MSVC >= 1200) && defined(_MSC_EXTENSIONS) +#if defined(_DEBUG) || defined(__MSVC_RUNTIME_CHECKS) || defined(_MANAGED) +# define BOOST_REGEX_CALL __cdecl +#else +# define BOOST_REGEX_CALL __fastcall +#endif +# define BOOST_REGEX_CCALL __cdecl +#endif + +#if defined(__BORLANDC__) && !defined(BOOST_DISABLE_WIN32) +# define BOOST_REGEX_CALL __fastcall +# define BOOST_REGEX_CCALL __stdcall +#endif + +#ifndef BOOST_REGEX_CALL +# define BOOST_REGEX_CALL +#endif +#ifndef BOOST_REGEX_CCALL +#define BOOST_REGEX_CCALL +#endif + +/***************************************************************************** + * + * Set up localisation model: + * + ****************************************************************************/ + +/* backwards compatibility: */ +#ifdef BOOST_RE_LOCALE_C +# define BOOST_REGEX_USE_C_LOCALE +#endif + +#ifdef BOOST_RE_LOCALE_CPP +# define BOOST_REGEX_USE_CPP_LOCALE +#endif + +/* Win32 defaults to native Win32 locale: */ +#if defined(_WIN32) && !defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(BOOST_REGEX_USE_C_LOCALE) && !defined(BOOST_REGEX_USE_CPP_LOCALE) && !defined(BOOST_REGEX_NO_W32) +# define BOOST_REGEX_USE_WIN32_LOCALE +#endif +/* otherwise use C++ locale if supported: */ +#if !defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(BOOST_REGEX_USE_C_LOCALE) && !defined(BOOST_REGEX_USE_CPP_LOCALE) && !defined(BOOST_NO_STD_LOCALE) +# define BOOST_REGEX_USE_CPP_LOCALE +#endif +/* otherwise use C+ locale: */ +#if !defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(BOOST_REGEX_USE_C_LOCALE) && !defined(BOOST_REGEX_USE_CPP_LOCALE) +# define BOOST_REGEX_USE_C_LOCALE +#endif + +#ifndef BOOST_REGEX_MAX_STATE_COUNT +# define BOOST_REGEX_MAX_STATE_COUNT 100000000 +#endif + + +/***************************************************************************** + * + * Error Handling for exception free compilers: + * + ****************************************************************************/ + +#ifdef BOOST_NO_EXCEPTIONS +/* + * If there are no exceptions then we must report critical-errors + * the only way we know how; by terminating. + */ +#include <stdexcept> +#include <string> +#include <boost/throw_exception.hpp> + +# define BOOST_REGEX_NOEH_ASSERT(x)\ +if(0 == (x))\ +{\ + std::string s("Error: critical regex++ failure in: ");\ + s.append(#x);\ + std::runtime_error e(s);\ + boost::throw_exception(e);\ +} +#else +/* + * With exceptions then error handling is taken care of and + * there is no need for these checks: + */ +# define BOOST_REGEX_NOEH_ASSERT(x) +#endif + + +/***************************************************************************** + * + * Stack protection under MS Windows: + * + ****************************************************************************/ + +#if !defined(BOOST_REGEX_NO_W32) && !defined(BOOST_REGEX_V3) +# if(defined(_WIN32) || defined(_WIN64) || defined(_WINCE)) \ + && !defined(__GNUC__) \ + && !(defined(__BORLANDC__) && (__BORLANDC__ >= 0x600)) \ + && !(defined(__MWERKS__) && (__MWERKS__ <= 0x3003)) +# define BOOST_REGEX_HAS_MS_STACK_GUARD +# endif +#elif defined(BOOST_REGEX_HAS_MS_STACK_GUARD) +# undef BOOST_REGEX_HAS_MS_STACK_GUARD +#endif + +#if defined(__cplusplus) && defined(BOOST_REGEX_HAS_MS_STACK_GUARD) + +namespace boost{ +namespace re_detail{ + +BOOST_REGEX_DECL void BOOST_REGEX_CALL reset_stack_guard_page(); + +} +} + +#endif + + +/***************************************************************************** + * + * Algorithm selection and configuration: + * + ****************************************************************************/ + +#if !defined(BOOST_REGEX_RECURSIVE) && !defined(BOOST_REGEX_NON_RECURSIVE) +# if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && !defined(_STLP_DEBUG) && !defined(__STL_DEBUG) && !(defined(BOOST_MSVC) && (BOOST_MSVC >= 1400)) +# define BOOST_REGEX_RECURSIVE +# else +# define BOOST_REGEX_NON_RECURSIVE +# endif +#endif + +#ifdef BOOST_REGEX_NON_RECURSIVE +# ifdef BOOST_REGEX_RECURSIVE +# error "Can't set both BOOST_REGEX_RECURSIVE and BOOST_REGEX_NON_RECURSIVE" +# endif +# ifndef BOOST_REGEX_BLOCKSIZE +# define BOOST_REGEX_BLOCKSIZE 4096 +# endif +# if BOOST_REGEX_BLOCKSIZE < 512 +# error "BOOST_REGEX_BLOCKSIZE must be at least 512" +# endif +# ifndef BOOST_REGEX_MAX_BLOCKS +# define BOOST_REGEX_MAX_BLOCKS 1024 +# endif +# ifdef BOOST_REGEX_HAS_MS_STACK_GUARD +# undef BOOST_REGEX_HAS_MS_STACK_GUARD +# endif +# ifndef BOOST_REGEX_MAX_CACHE_BLOCKS +# define BOOST_REGEX_MAX_CACHE_BLOCKS 16 +# endif +#endif + + +/***************************************************************************** + * + * helper memory allocation functions: + * + ****************************************************************************/ + +#if defined(__cplusplus) && defined(BOOST_REGEX_NON_RECURSIVE) +namespace boost{ namespace re_detail{ + +BOOST_REGEX_DECL void* BOOST_REGEX_CALL get_mem_block(); +BOOST_REGEX_DECL void BOOST_REGEX_CALL put_mem_block(void*); + +}} /* namespaces */ +#endif + +/***************************************************************************** + * + * Diagnostics: + * + ****************************************************************************/ + +#ifdef BOOST_REGEX_CONFIG_INFO +BOOST_REGEX_DECL void BOOST_REGEX_CALL print_regex_library_info(); +#endif + +#if defined(BOOST_REGEX_DIAG) +# pragma message ("BOOST_REGEX_DECL" BOOST_STRINGIZE(=BOOST_REGEX_DECL)) +# pragma message ("BOOST_REGEX_CALL" BOOST_STRINGIZE(=BOOST_REGEX_CALL)) +# pragma message ("BOOST_REGEX_CCALL" BOOST_STRINGIZE(=BOOST_REGEX_CCALL)) +#ifdef BOOST_REGEX_USE_C_LOCALE +# pragma message ("Using C locale in regex traits class") +#elif BOOST_REGEX_USE_CPP_LOCALE +# pragma message ("Using C++ locale in regex traits class") +#else +# pragma message ("Using Win32 locale in regex traits class") +#endif +#if defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK) +# pragma message ("Dynamic linking enabled") +#endif +#if defined(BOOST_REGEX_NO_LIB) || defined(BOOST_ALL_NO_LIB) +# pragma message ("Auto-linking disabled") +#endif +#ifdef BOOST_REGEX_NO_EXTERNAL_TEMPLATES +# pragma message ("Extern templates disabled") +#endif + +#endif + +#endif + + + + diff --git a/ext/boost/regex/config/borland.hpp b/ext/boost/regex/config/borland.hpp new file mode 100644 index 0000000000..51c2126b8e --- /dev/null +++ b/ext/boost/regex/config/borland.hpp @@ -0,0 +1,72 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE boost/regex/config/borland.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: regex borland-specific config setup. + */ + + +#if defined(__BORLANDC__) +# if (__BORLANDC__ == 0x550) || (__BORLANDC__ == 0x551) + // problems with std::basic_string and dll RTL: +# if defined(_RTLDLL) && defined(_RWSTD_COMPILE_INSTANTIATE) +# ifdef BOOST_REGEX_BUILD_DLL +# error _RWSTD_COMPILE_INSTANTIATE must not be defined when building regex++ as a DLL +# else +# pragma message("Defining _RWSTD_COMPILE_INSTANTIATE when linking to the DLL version of the RTL may produce memory corruption problems in std::basic_string, as a result of separate versions of basic_string's static data in the RTL and you're exe/dll: be warned!!") +# endif +# endif +# ifndef _RTLDLL + // this is harmless for a staic link: +# define _RWSTD_COMPILE_INSTANTIATE +# endif + // external templates cause problems for some reason: +# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES +# endif +# if (__BORLANDC__ <= 0x540) && !defined(BOOST_REGEX_NO_LIB) && !defined(_NO_VCL) + // C++ Builder 4 and earlier, we can't tell whether we should be using + // the VCL runtime or not, do a static link instead: +# define BOOST_REGEX_STATIC_LINK +# endif + // + // VCL support: + // if we're building a console app then there can't be any VCL (can there?) +# if !defined(__CONSOLE__) && !defined(_NO_VCL) +# define BOOST_REGEX_USE_VCL +# endif + // + // if this isn't Win32 then don't automatically select link + // libraries: + // +# ifndef _Windows +# ifndef BOOST_REGEX_NO_LIB +# define BOOST_REGEX_NO_LIB +# endif +# ifndef BOOST_REGEX_STATIC_LINK +# define BOOST_REGEX_STATIC_LINK +# endif +# endif + +#if __BORLANDC__ < 0x600 +// +// string workarounds: +// +#include <cstring> +#undef strcmp +#undef strcpy +#endif + +#endif + + diff --git a/ext/boost/regex/config/cwchar.hpp b/ext/boost/regex/config/cwchar.hpp new file mode 100644 index 0000000000..a55089d0ab --- /dev/null +++ b/ext/boost/regex/config/cwchar.hpp @@ -0,0 +1,207 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE boost/regex/config/cwchar.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: regex wide character string fixes. + */ + +#ifndef BOOST_REGEX_CONFIG_CWCHAR_HPP +#define BOOST_REGEX_CONFIG_CWCHAR_HPP + +#include <cwchar> +#include <cwctype> +#include <boost/config.hpp> + +#if defined(__STD_RWCOMPILER_H__) || defined(_RWSTD_VER) +// apparently this is required for the RW STL on Linux: +#undef iswalnum +#undef iswalpha +#undef iswblank +#undef iswcntrl +#undef iswdigit +#undef iswgraph +#undef iswlower +#undef iswprint +#undef iswprint +#undef iswpunct +#undef iswspace +#undef iswupper +#undef iswxdigit +#undef iswctype +#undef towlower +#undef towupper +#undef towctrans +#undef wctrans +#undef wctype +#endif + +namespace std{ + +#ifndef BOOST_NO_STDC_NAMESPACE +extern "C"{ +#endif + +#ifdef iswalnum +inline int (iswalnum)(wint_t i) +{ return iswalnum(i); } +#undef iswalnum +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswalnum; +#endif + +#ifdef iswalpha +inline int (iswalpha)(wint_t i) +{ return iswalpha(i); } +#undef iswalpha +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswalpha; +#endif + +#ifdef iswcntrl +inline int (iswcntrl)(wint_t i) +{ return iswcntrl(i); } +#undef iswcntrl +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswcntrl; +#endif + +#ifdef iswdigit +inline int (iswdigit)(wint_t i) +{ return iswdigit(i); } +#undef iswdigit +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswdigit; +#endif + +#ifdef iswgraph +inline int (iswgraph)(wint_t i) +{ return iswgraph(i); } +#undef iswgraph +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswgraph; +#endif + +#ifdef iswlower +inline int (iswlower)(wint_t i) +{ return iswlower(i); } +#undef iswlower +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswlower; +#endif + +#ifdef iswprint +inline int (iswprint)(wint_t i) +{ return iswprint(i); } +#undef iswprint +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswprint; +#endif + +#ifdef iswpunct +inline int (iswpunct)(wint_t i) +{ return iswpunct(i); } +#undef iswpunct +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswpunct; +#endif + +#ifdef iswspace +inline int (iswspace)(wint_t i) +{ return iswspace(i); } +#undef iswspace +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswspace; +#endif + +#ifdef iswupper +inline int (iswupper)(wint_t i) +{ return iswupper(i); } +#undef iswupper +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswupper; +#endif + +#ifdef iswxdigit +inline int (iswxdigit)(wint_t i) +{ return iswxdigit(i); } +#undef iswxdigit +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswxdigit; +#endif + +#ifdef towlower +inline wint_t (towlower)(wint_t i) +{ return towlower(i); } +#undef towlower +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::towlower; +#endif + +#ifdef towupper +inline wint_t (towupper)(wint_t i) +{ return towupper(i); } +#undef towupper +#elif defined(BOOST_NO_STDC_NAMESPACE) +using :: towupper; +#endif + +#ifdef wcscmp +inline int (wcscmp)(const wchar_t *p1, const wchar_t *p2) +{ return wcscmp(p1,p2); } +#undef wcscmp +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcscmp; +#endif + +#ifdef wcscoll +inline int (wcscoll)(const wchar_t *p1, const wchar_t *p2) +{ return wcscoll(p1,p2); } +#undef wcscoll +#elif defined(BOOST_NO_STDC_NAMESPACE) && !defined(UNDER_CE) +using ::wcscoll; +#endif + +#ifdef wcscpy +inline wchar_t *(wcscpy)(wchar_t *p1, const wchar_t *p2) +{ return wcscpy(p1,p2); } +#undef wcscpy +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcscpy; +#endif + +#ifdef wcslen +inline size_t (wcslen)(const wchar_t *p) +{ return wcslen(p); } +#undef wcslen +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcslen; +#endif + +#ifdef wcsxfrm +size_t wcsxfrm(wchar_t *p1, const wchar_t *p2, size_t s) +{ return wcsxfrm(p1,p2,s); } +#undef wcsxfrm +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcsxfrm; +#endif + + +#ifndef BOOST_NO_STDC_NAMESPACE +} // extern "C" +#endif + +} // namespace std + +#endif + diff --git a/ext/boost/regex/icu.hpp b/ext/boost/regex/icu.hpp new file mode 100644 index 0000000000..247155724e --- /dev/null +++ b/ext/boost/regex/icu.hpp @@ -0,0 +1,1021 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE icu.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Unicode regular expressions on top of the ICU Library. + */ + +#ifndef BOOST_REGEX_ICU_HPP +#define BOOST_REGEX_ICU_HPP + +#include <unicode/utypes.h> +#include <unicode/uchar.h> +#include <unicode/coll.h> +#include <boost/regex.hpp> +#include <boost/regex/pending/unicode_iterator.hpp> +#include <boost/mpl/int_fwd.hpp> +#include <bitset> + + +namespace boost{ + +namespace re_detail{ + +// +// Implementation details: +// +class BOOST_REGEX_DECL icu_regex_traits_implementation +{ + typedef UChar32 char_type; + typedef std::size_t size_type; + typedef std::vector<char_type> string_type; + typedef U_NAMESPACE_QUALIFIER Locale locale_type; + typedef boost::uint_least32_t char_class_type; +public: + icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l) + : m_locale(l) + { + UErrorCode success = U_ZERO_ERROR; + m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); + if(U_SUCCESS(success) == 0) + init_error(); + m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL); + success = U_ZERO_ERROR; + m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); + if(U_SUCCESS(success) == 0) + init_error(); + m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY); + } + U_NAMESPACE_QUALIFIER Locale getloc()const + { + return m_locale; + } + string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const; + string_type transform(const char_type* p1, const char_type* p2) const + { + return do_transform(p1, p2, m_collator.get()); + } + string_type transform_primary(const char_type* p1, const char_type* p2) const + { + return do_transform(p1, p2, m_primary_collator.get()); + } +private: + void init_error() + { + std::runtime_error e("Could not initialize ICU resources"); + boost::throw_exception(e); + } + U_NAMESPACE_QUALIFIER Locale m_locale; // The ICU locale that we're using + boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator; // The full collation object + boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator; // The primary collation object +}; + +inline boost::shared_ptr<icu_regex_traits_implementation> get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc) +{ + return boost::shared_ptr<icu_regex_traits_implementation>(new icu_regex_traits_implementation(loc)); +} + +} + +class BOOST_REGEX_DECL icu_regex_traits +{ +public: + typedef UChar32 char_type; + typedef std::size_t size_type; + typedef std::vector<char_type> string_type; + typedef U_NAMESPACE_QUALIFIER Locale locale_type; +#ifdef BOOST_NO_INT64_T + typedef std::bitset<64> char_class_type; +#else + typedef boost::uint64_t char_class_type; +#endif + + struct boost_extensions_tag{}; + + icu_regex_traits() + : m_pimpl(re_detail::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale())) + { + } + static size_type length(const char_type* p); + + ::boost::regex_constants::syntax_type syntax_type(char_type c)const + { + return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char; + } + ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const + { + return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_escape_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char; + } + char_type translate(char_type c) const + { + return c; + } + char_type translate_nocase(char_type c) const + { + return ::u_tolower(c); + } + char_type translate(char_type c, bool icase) const + { + return icase ? translate_nocase(c) : translate(c); + } + char_type tolower(char_type c) const + { + return ::u_tolower(c); + } + char_type toupper(char_type c) const + { + return ::u_toupper(c); + } + string_type transform(const char_type* p1, const char_type* p2) const + { + return m_pimpl->transform(p1, p2); + } + string_type transform_primary(const char_type* p1, const char_type* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const char_type* p1, const char_type* p2) const; + string_type lookup_collatename(const char_type* p1, const char_type* p2) const; + bool isctype(char_type c, char_class_type f) const; + int toi(const char_type*& p1, const char_type* p2, int radix)const + { + return re_detail::global_toi(p1, p2, radix, *this); + } + int value(char_type c, int radix)const + { + return u_digit(c, static_cast< ::int8_t>(radix)); + } + locale_type imbue(locale_type l) + { + locale_type result(m_pimpl->getloc()); + m_pimpl = re_detail::get_icu_regex_traits_implementation(l); + return result; + } + locale_type getloc()const + { + return locale_type(); + } + std::string error_string(::boost::regex_constants::error_type n) const + { + return re_detail::get_default_error_string(n); + } +private: + icu_regex_traits(const icu_regex_traits&); + icu_regex_traits& operator=(const icu_regex_traits&); + + // + // define the bitmasks offsets we need for additional character properties: + // + enum{ + offset_blank = U_CHAR_CATEGORY_COUNT, + offset_space = U_CHAR_CATEGORY_COUNT+1, + offset_xdigit = U_CHAR_CATEGORY_COUNT+2, + offset_underscore = U_CHAR_CATEGORY_COUNT+3, + offset_unicode = U_CHAR_CATEGORY_COUNT+4, + offset_any = U_CHAR_CATEGORY_COUNT+5, + offset_ascii = U_CHAR_CATEGORY_COUNT+6, + offset_horizontal = U_CHAR_CATEGORY_COUNT+7, + offset_vertical = U_CHAR_CATEGORY_COUNT+8 + }; + + // + // and now the masks: + // + static const char_class_type mask_blank; + static const char_class_type mask_space; + static const char_class_type mask_xdigit; + static const char_class_type mask_underscore; + static const char_class_type mask_unicode; + static const char_class_type mask_any; + static const char_class_type mask_ascii; + static const char_class_type mask_horizontal; + static const char_class_type mask_vertical; + + static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2); + + boost::shared_ptr< ::boost::re_detail::icu_regex_traits_implementation> m_pimpl; +}; + +} // namespace boost + +// +// template instances: +// +#define BOOST_REGEX_CHAR_T UChar32 +#undef BOOST_REGEX_TRAITS_T +#define BOOST_REGEX_TRAITS_T , icu_regex_traits +#define BOOST_REGEX_ICU_INSTANCES +#ifdef BOOST_REGEX_ICU_INSTANTIATE +# define BOOST_REGEX_INSTANTIATE +#endif +#include <boost/regex/v4/instances.hpp> +#undef BOOST_REGEX_CHAR_T +#undef BOOST_REGEX_TRAITS_T +#undef BOOST_REGEX_ICU_INSTANCES +#ifdef BOOST_REGEX_INSTANTIATE +# undef BOOST_REGEX_INSTANTIATE +#endif + +namespace boost{ + +// types: +typedef basic_regex< ::UChar32, icu_regex_traits> u32regex; +typedef match_results<const ::UChar32*> u32match; +typedef match_results<const ::UChar*> u16match; + +// +// Construction of 32-bit regex types from UTF-8 and UTF-16 primitives: +// +namespace re_detail{ + +#if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) +template <class InputIterator> +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<1>*) +{ + typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type; + return u32regex(conv_type(i), conv_type(j), opt); +} + +template <class InputIterator> +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<2>*) +{ + typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type; + return u32regex(conv_type(i), conv_type(j), opt); +} + +template <class InputIterator> +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<4>*) +{ + return u32regex(i, j, opt); +} +#else +template <class InputIterator> +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<1>*) +{ + typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type; + typedef std::vector<UChar32> vector_type; + vector_type v; + conv_type a(i), b(j); + while(a != b) + { + v.push_back(*a); + ++a; + } + if(v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt); +} + +template <class InputIterator> +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<2>*) +{ + typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type; + typedef std::vector<UChar32> vector_type; + vector_type v; + conv_type a(i), b(j); + while(a != b) + { + v.push_back(*a); + ++a; + } + if(v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt); +} + +template <class InputIterator> +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt, + const boost::mpl::int_<4>*) +{ + typedef std::vector<UChar32> vector_type; + vector_type v; + while(i != j) + { + v.push_back((UChar32)(*i)); + ++i; + } + if(v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt); +} +#endif +} + +// +// Construction from an iterator pair: +// +template <class InputIterator> +inline u32regex make_u32regex(InputIterator i, + InputIterator j, + boost::regex_constants::syntax_option_type opt) +{ + return re_detail::do_make_u32regex(i, j, opt, static_cast<boost::mpl::int_<sizeof(*i)> const*>(0)); +} +// +// construction from UTF-8 nul-terminated strings: +// +inline u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(p, p + std::strlen(p), opt, static_cast<boost::mpl::int_<1> const*>(0)); +} +inline u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(p, p + std::strlen(reinterpret_cast<const char*>(p)), opt, static_cast<boost::mpl::int_<1> const*>(0)); +} +// +// construction from UTF-16 nul-terminated strings: +// +#ifndef BOOST_NO_WREGEX +inline u32regex make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast<boost::mpl::int_<sizeof(wchar_t)> const*>(0)); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +inline u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(p, p + u_strlen(p), opt, static_cast<boost::mpl::int_<2> const*>(0)); +} +#endif +// +// construction from basic_string class-template: +// +template<class C, class T, class A> +inline u32regex make_u32regex(const std::basic_string<C, T, A>& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(s.begin(), s.end(), opt, static_cast<boost::mpl::int_<sizeof(C)> const*>(0)); +} +// +// Construction from ICU string type: +// +inline u32regex make_u32regex(const UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast<boost::mpl::int_<2> const*>(0)); +} + +// +// regex_match overloads that widen the character type as appropriate: +// +namespace re_detail{ +template<class MR1, class MR2> +void copy_results(MR1& out, MR2 const& in) +{ + // copy results from an adapted MR2 match_results: + out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base()); + out.set_base(in.base().base()); + for(int i = 0; i < (int)in.size(); ++i) + { + if(in[i].matched) + { + out.set_first(in[i].first.base(), i); + out.set_second(in[i].second.base(), i); + } + } +} + +template <class BidiIterator, class Allocator> +inline bool do_regex_match(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const u32regex& e, + match_flag_type flags, + boost::mpl::int_<4> const*) +{ + return ::boost::regex_match(first, last, m, e, flags); +} +template <class BidiIterator, class Allocator> +bool do_regex_match(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const u32regex& e, + match_flag_type flags, + boost::mpl::int_<2> const*) +{ + typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type; + typedef match_results<conv_type> match_type; + typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags); + // copy results across to m: + if(result) copy_results(m, what); + return result; +} +template <class BidiIterator, class Allocator> +bool do_regex_match(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const u32regex& e, + match_flag_type flags, + boost::mpl::int_<1> const*) +{ + typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type; + typedef match_results<conv_type> match_type; + typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags); + // copy results across to m: + if(result) copy_results(m, what); + return result; +} +} // namespace re_detail + +template <class BidiIterator, class Allocator> +inline bool u32regex_match(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0)); +} +inline bool u32regex_match(const UChar* p, + match_results<const UChar*>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0)); +} +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX) +inline bool u32regex_match(const wchar_t* p, + match_results<const wchar_t*>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} +#endif +inline bool u32regex_match(const char* p, + match_results<const char*>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0)); +} +inline bool u32regex_match(const unsigned char* p, + match_results<const unsigned char*>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0)); +} +inline bool u32regex_match(const std::string& s, + match_results<std::string::const_iterator>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_match(const std::wstring& s, + match_results<std::wstring::const_iterator>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} +#endif +inline bool u32regex_match(const UnicodeString& s, + match_results<const UChar*>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} +// +// regex_match overloads that do not return what matched: +// +template <class BidiIterator> +inline bool u32regex_match(BidiIterator first, BidiIterator last, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<BidiIterator> m; + return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0)); +} +inline bool u32regex_match(const UChar* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<const UChar*> m; + return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0)); +} +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX) +inline bool u32regex_match(const wchar_t* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<const wchar_t*> m; + return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} +#endif +inline bool u32regex_match(const char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<const char*> m; + return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0)); +} +inline bool u32regex_match(const unsigned char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<const unsigned char*> m; + return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0)); +} +inline bool u32regex_match(const std::string& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<std::string::const_iterator> m; + return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_match(const std::wstring& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<std::wstring::const_iterator> m; + return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} +#endif +inline bool u32regex_match(const UnicodeString& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<const UChar*> m; + return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} + +// +// regex_search overloads that widen the character type as appropriate: +// +namespace re_detail{ +template <class BidiIterator, class Allocator> +inline bool do_regex_search(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + boost::mpl::int_<4> const*) +{ + return ::boost::regex_search(first, last, m, e, flags, base); +} +template <class BidiIterator, class Allocator> +bool do_regex_search(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + boost::mpl::int_<2> const*) +{ + typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type; + typedef match_results<conv_type> match_type; + typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base)); + // copy results across to m: + if(result) copy_results(m, what); + return result; +} +template <class BidiIterator, class Allocator> +bool do_regex_search(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + boost::mpl::int_<1> const*) +{ + typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type; + typedef match_results<conv_type> match_type; + typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base)); + // copy results across to m: + if(result) copy_results(m, what); + return result; +} +} + +template <class BidiIterator, class Allocator> +inline bool u32regex_search(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0)); +} +template <class BidiIterator, class Allocator> +inline bool u32regex_search(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base) +{ + return re_detail::do_regex_search(first, last, m, e, flags, base, static_cast<mpl::int_<sizeof(*first)> const*>(0)); +} +inline bool u32regex_search(const UChar* p, + match_results<const UChar*>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0)); +} +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX) +inline bool u32regex_search(const wchar_t* p, + match_results<const wchar_t*>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} +#endif +inline bool u32regex_search(const char* p, + match_results<const char*>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0)); +} +inline bool u32regex_search(const unsigned char* p, + match_results<const unsigned char*>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0)); +} +inline bool u32regex_search(const std::string& s, + match_results<std::string::const_iterator>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_search(const std::wstring& s, + match_results<std::wstring::const_iterator>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} +#endif +inline bool u32regex_search(const UnicodeString& s, + match_results<const UChar*>& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} +template <class BidiIterator> +inline bool u32regex_search(BidiIterator first, BidiIterator last, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<BidiIterator> m; + return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0)); +} +inline bool u32regex_search(const UChar* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<const UChar*> m; + return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0)); +} +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX) +inline bool u32regex_search(const wchar_t* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<const wchar_t*> m; + return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} +#endif +inline bool u32regex_search(const char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<const char*> m; + return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0)); +} +inline bool u32regex_search(const unsigned char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<const unsigned char*> m; + return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0)); +} +inline bool u32regex_search(const std::string& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<std::string::const_iterator> m; + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_search(const std::wstring& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<std::wstring::const_iterator> m; + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} +#endif +inline bool u32regex_search(const UnicodeString& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results<const UChar*> m; + return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); +} + +// +// overloads for regex_replace with utf-8 and utf-16 data types: +// +namespace re_detail{ +template <class I> +inline std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> > + make_utf32_seq(I i, I j, mpl::int_<1> const*) +{ + return std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >(boost::u8_to_u32_iterator<I>(i), boost::u8_to_u32_iterator<I>(j)); +} +template <class I> +inline std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> > + make_utf32_seq(I i, I j, mpl::int_<2> const*) +{ + return std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >(boost::u16_to_u32_iterator<I>(i), boost::u16_to_u32_iterator<I>(j)); +} +template <class I> +inline std::pair< I, I > + make_utf32_seq(I i, I j, mpl::int_<4> const*) +{ + return std::pair< I, I >(i, j); +} +template <class charT> +inline std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> > + make_utf32_seq(const charT* p, mpl::int_<1> const*) +{ + return std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >(boost::u8_to_u32_iterator<const charT*>(p), boost::u8_to_u32_iterator<const charT*>(p+std::strlen((const char*)p))); +} +template <class charT> +inline std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> > + make_utf32_seq(const charT* p, mpl::int_<2> const*) +{ + return std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >(boost::u16_to_u32_iterator<const charT*>(p), boost::u16_to_u32_iterator<const charT*>(p+u_strlen((const UChar*)p))); +} +template <class charT> +inline std::pair< const charT*, const charT* > + make_utf32_seq(const charT* p, mpl::int_<4> const*) +{ + return std::pair< const charT*, const charT* >(p, p+icu_regex_traits::length((UChar32 const*)p)); +} +template <class OutputIterator> +inline OutputIterator make_utf32_out(OutputIterator o, mpl::int_<4> const*) +{ + return o; +} +template <class OutputIterator> +inline utf16_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<2> const*) +{ + return o; +} +template <class OutputIterator> +inline utf8_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<1> const*) +{ + return o; +} + +template <class OutputIterator, class I1, class I2> +OutputIterator do_regex_replace(OutputIterator out, + std::pair<I1, I1> const& in, + const u32regex& e, + const std::pair<I2, I2>& fmt, + match_flag_type flags + ) +{ + // unfortunately we have to copy the format string in order to pass in onward: + std::vector<UChar32> f; +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + f.assign(fmt.first, fmt.second); +#else + f.clear(); + I2 pos = fmt.first; + while(pos != fmt.second) + f.push_back(*pos++); +#endif + + regex_iterator<I1, UChar32, icu_regex_traits> i(in.first, in.second, e, flags); + regex_iterator<I1, UChar32, icu_regex_traits> j; + if(i == j) + { + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(in.first, in.second, out); + } + else + { + I1 last_m = in.first; + while(i != j) + { + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(i->prefix().first, i->prefix().second, out); + if(f.size()) + out = ::boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits()); + else + out = ::boost::re_detail::regex_format_imp(out, *i, static_cast<UChar32 const*>(0), static_cast<UChar32 const*>(0), flags, e.get_traits()); + last_m = (*i)[0].second; + if(flags & regex_constants::format_first_only) + break; + ++i; + } + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(last_m, in.second, out); + } + return out; +} +template <class BaseIterator> +inline const BaseIterator& extract_output_base(const BaseIterator& b) +{ + return b; +} +template <class BaseIterator> +inline BaseIterator extract_output_base(const utf8_output_iterator<BaseIterator>& b) +{ + return b.base(); +} +template <class BaseIterator> +inline BaseIterator extract_output_base(const utf16_output_iterator<BaseIterator>& b) +{ + return b.base(); +} +} // re_detail + +template <class OutputIterator, class BidirectionalIterator, class charT> +inline OutputIterator u32regex_replace(OutputIterator out, + BidirectionalIterator first, + BidirectionalIterator last, + const u32regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return re_detail::extract_output_base +#if BOOST_WORKAROUND(BOOST_MSVC, <= 1300) + <OutputIterator> +#endif + ( + re_detail::do_regex_replace( + re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)), + re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)), + e, + re_detail::make_utf32_seq(fmt, static_cast<mpl::int_<sizeof(*fmt)> const*>(0)), + flags) + ); +} + +template <class OutputIterator, class Iterator, class charT> +inline OutputIterator u32regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const u32regex& e, + const std::basic_string<charT>& fmt, + match_flag_type flags = match_default) +{ + return re_detail::extract_output_base +#if BOOST_WORKAROUND(BOOST_MSVC, <= 1300) + <OutputIterator> +#endif + ( + re_detail::do_regex_replace( + re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)), + re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)), + e, + re_detail::make_utf32_seq(fmt.begin(), fmt.end(), static_cast<mpl::int_<sizeof(charT)> const*>(0)), + flags) + ); +} + +template <class OutputIterator, class Iterator> +inline OutputIterator u32regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const u32regex& e, + const UnicodeString& fmt, + match_flag_type flags = match_default) +{ + return re_detail::extract_output_base +#if BOOST_WORKAROUND(BOOST_MSVC, <= 1300) + <OutputIterator> +#endif + ( + re_detail::do_regex_replace( + re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)), + re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)), + e, + re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)), + flags) + ); +} + +template <class charT> +std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s, + const u32regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + std::basic_string<charT> result; + re_detail::string_out_iterator<std::basic_string<charT> > i(result); + u32regex_replace(i, s.begin(), s.end(), e, fmt, flags); + return result; +} + +template <class charT> +std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s, + const u32regex& e, + const std::basic_string<charT>& fmt, + match_flag_type flags = match_default) +{ + std::basic_string<charT> result; + re_detail::string_out_iterator<std::basic_string<charT> > i(result); + u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags); + return result; +} + +namespace re_detail{ + +class unicode_string_out_iterator +{ + UnicodeString* out; +public: + unicode_string_out_iterator(UnicodeString& s) : out(&s) {} + unicode_string_out_iterator& operator++() { return *this; } + unicode_string_out_iterator& operator++(int) { return *this; } + unicode_string_out_iterator& operator*() { return *this; } + unicode_string_out_iterator& operator=(UChar v) + { + *out += v; + return *this; + } + typedef std::ptrdiff_t difference_type; + typedef UChar value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef std::output_iterator_tag iterator_category; +}; + +} + +inline UnicodeString u32regex_replace(const UnicodeString& s, + const u32regex& e, + const UChar* fmt, + match_flag_type flags = match_default) +{ + UnicodeString result; + re_detail::unicode_string_out_iterator i(result); + u32regex_replace(i, s.getBuffer(), s.getBuffer()+s.length(), e, fmt, flags); + return result; +} + +inline UnicodeString u32regex_replace(const UnicodeString& s, + const u32regex& e, + const UnicodeString& fmt, + match_flag_type flags = match_default) +{ + UnicodeString result; + re_detail::unicode_string_out_iterator i(result); + re_detail::do_regex_replace( + re_detail::make_utf32_out(i, static_cast<mpl::int_<2> const*>(0)), + re_detail::make_utf32_seq(s.getBuffer(), s.getBuffer()+s.length(), static_cast<mpl::int_<2> const*>(0)), + e, + re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)), + flags); + return result; +} + +} // namespace boost. + +#include <boost/regex/v4/u32regex_iterator.hpp> +#include <boost/regex/v4/u32regex_token_iterator.hpp> + +#endif diff --git a/ext/boost/regex/mfc.hpp b/ext/boost/regex/mfc.hpp new file mode 100644 index 0000000000..02502f9504 --- /dev/null +++ b/ext/boost/regex/mfc.hpp @@ -0,0 +1,190 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE mfc.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Overloads and helpers for using MFC/ATL string types with Boost.Regex. + */ + +#ifndef BOOST_REGEX_MFC_HPP +#define BOOST_REGEX_MFC_HPP + +#include <atlsimpstr.h> +#include <boost/regex.hpp> + +namespace boost{ + +// +// define the types used for TCHAR's: +typedef basic_regex<TCHAR> tregex; +typedef match_results<TCHAR const*> tmatch; +typedef regex_iterator<TCHAR const*> tregex_iterator; +typedef regex_token_iterator<TCHAR const*> tregex_token_iterator; + +#if _MSC_VER >= 1310 +#define SIMPLE_STRING_PARAM class B, bool b +#define SIMPLE_STRING_ARG_LIST B, b +#else +#define SIMPLE_STRING_PARAM class B +#define SIMPLE_STRING_ARG_LIST B +#endif + +// +// define regex creation functions: +// +template <SIMPLE_STRING_PARAM> +inline basic_regex<B> +make_regex(const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& s, ::boost::regex_constants::syntax_option_type f = boost::regex_constants::normal) +{ + basic_regex<B> result(s.GetString(), s.GetString() + s.GetLength(), f); + return result; +} +// +// regex_match overloads: +// +template <SIMPLE_STRING_PARAM, class A, class T> +inline bool regex_match(const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& s, + match_results<const B*, A>& what, + const basic_regex<B, T>& e, + boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + return ::boost::regex_match(s.GetString(), + s.GetString() + s.GetLength(), + what, + e, + f); +} + +template <SIMPLE_STRING_PARAM, class T> +inline bool regex_match(const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& s, + const basic_regex<B, T>& e, + boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + return ::boost::regex_match(s.GetString(), + s.GetString() + s.GetLength(), + e, + f); +} +// +// regex_search overloads: +// +template <SIMPLE_STRING_PARAM, class A, class T> +inline bool regex_search(const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& s, + match_results<const B*, A>& what, + const basic_regex<B, T>& e, + boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + return ::boost::regex_search(s.GetString(), + s.GetString() + s.GetLength(), + what, + e, + f); +} + +template <SIMPLE_STRING_PARAM, class T> +inline bool regex_search(const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& s, + const basic_regex<B, T>& e, + boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + return ::boost::regex_search(s.GetString(), + s.GetString() + s.GetLength(), + e, + f); +} +// +// regex_iterator creation: +// +template <SIMPLE_STRING_PARAM> +inline regex_iterator<B const*> +make_regex_iterator(const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& s, const basic_regex<B>& e, ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + regex_iterator<B const*> result(s.GetString(), s.GetString() + s.GetLength(), e, f); + return result; +} + +template <SIMPLE_STRING_PARAM> +inline regex_token_iterator<B const*> + make_regex_token_iterator(const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& s, const basic_regex<B>& e, int sub = 0, ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + regex_token_iterator<B const*> result(s.GetString(), s.GetString() + s.GetLength(), e, sub, f); + return result; +} + +template <SIMPLE_STRING_PARAM> +inline regex_token_iterator<B const*> +make_regex_token_iterator(const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& s, const basic_regex<B>& e, const std::vector<int>& subs, ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + regex_token_iterator<B const*> result(s.GetString(), s.GetString() + s.GetLength(), e, subs, f); + return result; +} + +template <SIMPLE_STRING_PARAM, std::size_t N> +inline regex_token_iterator<B const*> +make_regex_token_iterator(const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& s, const basic_regex<B>& e, const int (& subs)[N], ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) +{ + regex_token_iterator<B const*> result(s.GetString(), s.GetString() + s.GetLength(), e, subs, f); + return result; +} + +template <class OutputIterator, class BidirectionalIterator, class traits, + SIMPLE_STRING_PARAM> +OutputIterator regex_replace(OutputIterator out, + BidirectionalIterator first, + BidirectionalIterator last, + const basic_regex<B, traits>& e, + const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& fmt, + match_flag_type flags = match_default) +{ + return ::boost::regex_replace(out, first, last, e, fmt.GetString(), flags); +} + +namespace re_detail{ + +template <SIMPLE_STRING_PARAM> +class mfc_string_out_iterator +{ + ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>* out; +public: + mfc_string_out_iterator(ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& s) : out(&s) {} + mfc_string_out_iterator& operator++() { return *this; } + mfc_string_out_iterator& operator++(int) { return *this; } + mfc_string_out_iterator& operator*() { return *this; } + mfc_string_out_iterator& operator=(B v) + { + out->AppendChar(v); + return *this; + } + typedef std::ptrdiff_t difference_type; + typedef B value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef std::output_iterator_tag iterator_category; +}; + +} + +template <class traits, SIMPLE_STRING_PARAM> +ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST> regex_replace(const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& s, + const basic_regex<B, traits>& e, + const ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST>& fmt, + match_flag_type flags = match_default) +{ + ATL::CSimpleStringT<SIMPLE_STRING_ARG_LIST> result(s.GetManager()); + re_detail::mfc_string_out_iterator<SIMPLE_STRING_ARG_LIST> i(result); + regex_replace(i, s.GetString(), s.GetString() + s.GetLength(), e, fmt.GetString(), flags); + return result; +} + +} // namespace boost. + +#endif diff --git a/ext/boost/regex/pattern_except.hpp b/ext/boost/regex/pattern_except.hpp new file mode 100644 index 0000000000..57ea14c256 --- /dev/null +++ b/ext/boost/regex/pattern_except.hpp @@ -0,0 +1,100 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE pattern_except.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares pattern-matching exception classes. + */ + +#ifndef BOOST_RE_PAT_EXCEPT_HPP +#define BOOST_RE_PAT_EXCEPT_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include <boost/regex/config.hpp> +#endif + +#include <stdexcept> +#include <cstddef> +#include <boost/regex/v4/error_type.hpp> + +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable : 4275) +#endif +class BOOST_REGEX_DECL regex_error : public std::runtime_error +{ +public: + explicit regex_error(const std::string& s, regex_constants::error_type err = regex_constants::error_unknown, std::ptrdiff_t pos = 0); + explicit regex_error(regex_constants::error_type err); + ~regex_error() throw(); + regex_constants::error_type code()const + { return m_error_code; } + std::ptrdiff_t position()const + { return m_position; } + void raise()const; +private: + regex_constants::error_type m_error_code; + std::ptrdiff_t m_position; +}; + +typedef regex_error bad_pattern; +typedef regex_error bad_expression; + +namespace re_detail{ + +BOOST_REGEX_DECL void BOOST_REGEX_CALL raise_runtime_error(const std::runtime_error& ex); + +template <class traits> +void raise_error(const traits& t, regex_constants::error_type code) +{ + (void)t; // warning suppression + std::runtime_error e(t.error_string(code)); + ::boost::re_detail::raise_runtime_error(e); +} + +} + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif + + + diff --git a/ext/boost/regex/pending/object_cache.hpp b/ext/boost/regex/pending/object_cache.hpp new file mode 100644 index 0000000000..2a7e00bc0a --- /dev/null +++ b/ext/boost/regex/pending/object_cache.hpp @@ -0,0 +1,163 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE object_cache.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Implements a generic object cache. + */ + +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#define BOOST_REGEX_OBJECT_CACHE_HPP + +#include <map> +#include <list> +#include <stdexcept> +#include <string> +#include <boost/config.hpp> +#include <boost/shared_ptr.hpp> +#ifdef BOOST_HAS_THREADS +#include <boost/regex/pending/static_mutex.hpp> +#endif + +namespace boost{ + +template <class Key, class Object> +class object_cache +{ +public: + typedef std::pair< ::boost::shared_ptr<Object const>, Key const*> value_type; + typedef std::list<value_type> list_type; + typedef typename list_type::iterator list_iterator; + typedef std::map<Key, list_iterator> map_type; + typedef typename map_type::iterator map_iterator; + typedef typename list_type::size_type size_type; + static boost::shared_ptr<Object const> get(const Key& k, size_type max_cache_size); + +private: + static boost::shared_ptr<Object const> do_get(const Key& k, size_type max_cache_size); + + struct data + { + list_type cont; + map_type index; + }; + + // Needed by compilers not implementing the resolution to DR45. For reference, + // see http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45. + friend struct data; +}; + +template <class Key, class Object> +boost::shared_ptr<Object const> object_cache<Key, Object>::get(const Key& k, size_type max_cache_size) +{ +#ifdef BOOST_HAS_THREADS + static boost::static_mutex mut = BOOST_STATIC_MUTEX_INIT; + + boost::static_mutex::scoped_lock l(mut); + if(l) + { + return do_get(k, max_cache_size); + } + // + // what do we do if the lock fails? + // for now just throw, but we should never really get here... + // + ::boost::throw_exception(std::runtime_error("Error in thread safety code: could not acquire a lock")); + return boost::shared_ptr<Object>(); +#else + return do_get(k, max_cache_size); +#endif +} + +template <class Key, class Object> +boost::shared_ptr<Object const> object_cache<Key, Object>::do_get(const Key& k, size_type max_cache_size) +{ + typedef typename object_cache<Key, Object>::data object_data; + typedef typename map_type::size_type map_size_type; + static object_data s_data; + + // + // see if the object is already in the cache: + // + map_iterator mpos = s_data.index.find(k); + if(mpos != s_data.index.end()) + { + // + // Eureka! + // We have a cached item, bump it up the list and return it: + // + if(--(s_data.cont.end()) != mpos->second) + { + // splice out the item we want to move: + list_type temp; + temp.splice(temp.end(), s_data.cont, mpos->second); + // and now place it at the end of the list: + s_data.cont.splice(s_data.cont.end(), temp, temp.begin()); + BOOST_ASSERT(*(s_data.cont.back().second) == k); + // update index with new position: + mpos->second = --(s_data.cont.end()); + BOOST_ASSERT(&(mpos->first) == mpos->second->second); + BOOST_ASSERT(&(mpos->first) == s_data.cont.back().second); + } + return s_data.cont.back().first; + } + // + // if we get here then the item is not in the cache, + // so create it: + // + boost::shared_ptr<Object const> result(new Object(k)); + // + // Add it to the list, and index it: + // + s_data.cont.push_back(value_type(result, static_cast<Key const*>(0))); + s_data.index.insert(std::make_pair(k, --(s_data.cont.end()))); + s_data.cont.back().second = &(s_data.index.find(k)->first); + map_size_type s = s_data.index.size(); + BOOST_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_ASSERT(s_data.index.find(k)->first == k); + if(s > max_cache_size) + { + // + // We have too many items in the list, so we need to start + // popping them off the back of the list, but only if they're + // being held uniquely by us: + // + list_iterator pos = s_data.cont.begin(); + list_iterator last = s_data.cont.end(); + while((pos != last) && (s > max_cache_size)) + { + if(pos->first.unique()) + { + list_iterator condemmed(pos); + ++pos; + // now remove the items from our containers, + // then order has to be as follows: + BOOST_ASSERT(s_data.index.find(*(condemmed->second)) != s_data.index.end()); + s_data.index.erase(*(condemmed->second)); + s_data.cont.erase(condemmed); + --s; + } + else + --pos; + } + BOOST_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_ASSERT(s_data.index.find(k)->first == k); + } + return result; +} + +} + +#endif diff --git a/ext/boost/regex/pending/static_mutex.hpp b/ext/boost/regex/pending/static_mutex.hpp new file mode 100644 index 0000000000..218169c3c8 --- /dev/null +++ b/ext/boost/regex/pending/static_mutex.hpp @@ -0,0 +1,184 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE static_mutex.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares static_mutex lock type, there are three different + * implementations: POSIX pthreads, WIN32 threads, and portable, + * these are described in more detail below. + */ + +#ifndef BOOST_REGEX_STATIC_MUTEX_HPP +#define BOOST_REGEX_STATIC_MUTEX_HPP + +#include <boost/config.hpp> +#include <boost/regex/config.hpp> // dll import/export options. + +#ifdef BOOST_HAS_PTHREADS +#include <pthread.h> +#endif + +#if defined(BOOST_HAS_PTHREADS) && defined(PTHREAD_MUTEX_INITIALIZER) +// +// pthreads version: +// simple wrap around a pthread_mutex_t initialized with +// PTHREAD_MUTEX_INITIALIZER. +// +namespace boost{ + +class BOOST_REGEX_DECL scoped_static_mutex_lock; + +class static_mutex +{ +public: + typedef scoped_static_mutex_lock scoped_lock; + pthread_mutex_t m_mutex; +}; + +#define BOOST_STATIC_MUTEX_INIT { PTHREAD_MUTEX_INITIALIZER, } + +class BOOST_REGEX_DECL scoped_static_mutex_lock +{ +public: + scoped_static_mutex_lock(static_mutex& mut, bool lk = true); + ~scoped_static_mutex_lock(); + inline bool locked()const + { + return m_have_lock; + } + inline operator void const*()const + { + return locked() ? this : 0; + } + void lock(); + void unlock(); +private: + static_mutex& m_mutex; + bool m_have_lock; +}; + + +} // namespace boost +#elif defined(BOOST_HAS_WINTHREADS) +// +// Win32 version: +// Use a 32-bit int as a lock, along with a test-and-set +// implementation using InterlockedCompareExchange. +// + +#include <boost/cstdint.hpp> + +namespace boost{ + +class BOOST_REGEX_DECL scoped_static_mutex_lock; + +class static_mutex +{ +public: + typedef scoped_static_mutex_lock scoped_lock; + boost::int32_t m_mutex; +}; + +#define BOOST_STATIC_MUTEX_INIT { 0, } + +class BOOST_REGEX_DECL scoped_static_mutex_lock +{ +public: + scoped_static_mutex_lock(static_mutex& mut, bool lk = true); + ~scoped_static_mutex_lock(); + operator void const*()const; + bool locked()const; + void lock(); + void unlock(); +private: + static_mutex& m_mutex; + bool m_have_lock; + scoped_static_mutex_lock(const scoped_static_mutex_lock&); + scoped_static_mutex_lock& operator=(const scoped_static_mutex_lock&); +}; + +inline scoped_static_mutex_lock::operator void const*()const +{ + return locked() ? this : 0; +} + +inline bool scoped_static_mutex_lock::locked()const +{ + return m_have_lock; +} + +} // namespace + +#else +// +// Portable version of a static mutex based on Boost.Thread library: +// This has to use a single mutex shared by all instances of static_mutex +// because boost::call_once doesn't alow us to pass instance information +// down to the initialisation proceedure. In fact the initialisation routine +// may need to be called more than once - but only once per instance. +// +// Since this preprocessor path is almost never taken, we hide these header +// dependencies so that build tools don't find them. +// +#define B1 <boost/thread/once.hpp> +#define B2 <boost/thread/recursive_mutex.hpp> +#include B1 +#include B2 +#undef B1 +#undef B2 + +namespace boost{ + +class BOOST_REGEX_DECL scoped_static_mutex_lock; +extern "C" BOOST_REGEX_DECL void free_static_mutex(); + +class BOOST_REGEX_DECL static_mutex +{ +public: + typedef scoped_static_mutex_lock scoped_lock; + static void init(); + static boost::recursive_mutex* m_pmutex; + static boost::once_flag m_once; +}; + +#define BOOST_STATIC_MUTEX_INIT { } + +class BOOST_REGEX_DECL scoped_static_mutex_lock +{ +public: + scoped_static_mutex_lock(static_mutex& mut, bool lk = true); + ~scoped_static_mutex_lock(); + operator void const*()const; + bool locked()const; + void lock(); + void unlock(); +private: + boost::recursive_mutex::scoped_lock* m_plock; + bool m_have_lock; +}; + +inline scoped_static_mutex_lock::operator void const*()const +{ + return locked() ? this : 0; +} + +inline bool scoped_static_mutex_lock::locked()const +{ + return m_have_lock; +} + +} // namespace + +#endif + +#endif diff --git a/ext/boost/regex/pending/unicode_iterator.hpp b/ext/boost/regex/pending/unicode_iterator.hpp new file mode 100644 index 0000000000..657ca0a4cf --- /dev/null +++ b/ext/boost/regex/pending/unicode_iterator.hpp @@ -0,0 +1,692 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE unicode_iterator.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Iterator adapters for converting between different Unicode encodings. + */ + +/**************************************************************************** + +Contents: +~~~~~~~~~ + +1) Read Only, Input Adapters: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +template <class BaseIterator, class U8Type = ::boost::uint8_t> +class u32_to_u8_iterator; + +Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-8. + +template <class BaseIterator, class U32Type = ::boost::uint32_t> +class u8_to_u32_iterator; + +Adapts sequence of UTF-8 code points to "look like" a sequence of UTF-32. + +template <class BaseIterator, class U16Type = ::boost::uint16_t> +class u32_to_u16_iterator; + +Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-16. + +template <class BaseIterator, class U32Type = ::boost::uint32_t> +class u16_to_u32_iterator; + +Adapts sequence of UTF-16 code points to "look like" a sequence of UTF-32. + +2) Single pass output iterator adapters: + +template <class BaseIterator> +class utf8_output_iterator; + +Accepts UTF-32 code points and forwards them on as UTF-8 code points. + +template <class BaseIterator> +class utf16_output_iterator; + +Accepts UTF-32 code points and forwards them on as UTF-16 code points. + +****************************************************************************/ + +#ifndef BOOST_REGEX_UNICODE_ITERATOR_HPP +#define BOOST_REGEX_UNICODE_ITERATOR_HPP +#include <boost/cstdint.hpp> +#include <boost/assert.hpp> +#include <boost/iterator/iterator_facade.hpp> +#include <boost/static_assert.hpp> +#include <boost/throw_exception.hpp> +#include <stdexcept> +#ifndef BOOST_NO_STD_LOCALE +#include <sstream> +#include <ios> +#endif +#include <limits.h> // CHAR_BIT + +namespace boost{ + +namespace detail{ + +static const ::boost::uint16_t high_surrogate_base = 0xD7C0u; +static const ::boost::uint16_t low_surrogate_base = 0xDC00u; +static const ::boost::uint32_t ten_bit_mask = 0x3FFu; + +inline bool is_high_surrogate(::boost::uint16_t v) +{ + return (v & 0xFC00u) == 0xd800u; +} +inline bool is_low_surrogate(::boost::uint16_t v) +{ + return (v & 0xFC00u) == 0xdc00u; +} +template <class T> +inline bool is_surrogate(T v) +{ + return (v & 0xF800u) == 0xd800; +} + +inline unsigned utf8_byte_count(boost::uint8_t c) +{ + // if the most significant bit with a zero in it is in position + // 8-N then there are N bytes in this UTF-8 sequence: + boost::uint8_t mask = 0x80u; + unsigned result = 0; + while(c & mask) + { + ++result; + mask >>= 1; + } + return (result == 0) ? 1 : ((result > 4) ? 4 : result); +} + +inline unsigned utf8_trailing_byte_count(boost::uint8_t c) +{ + return utf8_byte_count(c) - 1; +} + +inline void invalid_utf32_code_point(::boost::uint32_t val) +{ +#ifndef BOOST_NO_STD_LOCALE + std::stringstream ss; + ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-16 sequence"; + std::out_of_range e(ss.str()); +#else + std::out_of_range e("Invalid UTF-32 code point encountered while trying to encode UTF-16 sequence"); +#endif + boost::throw_exception(e); +} + + +} // namespace detail + +template <class BaseIterator, class U16Type = ::boost::uint16_t> +class u32_to_u16_iterator + : public boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type> +{ + typedef boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type> base_type; + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32); + BOOST_STATIC_ASSERT(sizeof(U16Type)*CHAR_BIT == 16); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_current == 2) + extract_current(); + return m_values[m_current]; + } + bool equal(const u32_to_u16_iterator& that)const + { + if(m_position == that.m_position) + { + // Both m_currents must be equal, or both even + // this is the same as saying their sum must be even: + return (m_current + that.m_current) & 1u ? false : true; + } + return false; + } + void increment() + { + // if we have a pending read then read now, so that we know whether + // to skip a position, or move to a low-surrogate: + if(m_current == 2) + { + // pending read: + extract_current(); + } + // move to the next surrogate position: + ++m_current; + // if we've reached the end skip a position: + if(m_values[m_current] == 0) + { + m_current = 2; + ++m_position; + } + } + void decrement() + { + if(m_current != 1) + { + // decrementing an iterator always leads to a valid position: + --m_position; + extract_current(); + m_current = m_values[1] ? 1 : 0; + } + else + { + m_current = 0; + } + } + BaseIterator base()const + { + return m_position; + } + // construct: + u32_to_u16_iterator() : m_position(), m_current(0) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + } + u32_to_u16_iterator(BaseIterator b) : m_position(b), m_current(2) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + } +private: + + void extract_current()const + { + // begin by checking for a code point out of range: + ::boost::uint32_t v = *m_position; + if(v >= 0x10000u) + { + if(v > 0x10FFFFu) + detail::invalid_utf32_code_point(*m_position); + // split into two surrogates: + m_values[0] = static_cast<U16Type>(v >> 10) + detail::high_surrogate_base; + m_values[1] = static_cast<U16Type>(v & detail::ten_bit_mask) + detail::low_surrogate_base; + m_current = 0; + BOOST_ASSERT(detail::is_high_surrogate(m_values[0])); + BOOST_ASSERT(detail::is_low_surrogate(m_values[1])); + } + else + { + // 16-bit code point: + m_values[0] = static_cast<U16Type>(*m_position); + m_values[1] = 0; + m_current = 0; + // value must not be a surrogate: + if(detail::is_surrogate(m_values[0])) + detail::invalid_utf32_code_point(*m_position); + } + } + BaseIterator m_position; + mutable U16Type m_values[3]; + mutable unsigned m_current; +}; + +template <class BaseIterator, class U32Type = ::boost::uint32_t> +class u16_to_u32_iterator + : public boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> +{ + typedef boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type; + // special values for pending iterator reads: + BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu); + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 16); + BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_value == pending_read) + extract_current(); + return m_value; + } + bool equal(const u16_to_u32_iterator& that)const + { + return m_position == that.m_position; + } + void increment() + { + // skip high surrogate first if there is one: + if(detail::is_high_surrogate(*m_position)) ++m_position; + ++m_position; + m_value = pending_read; + } + void decrement() + { + --m_position; + // if we have a low surrogate then go back one more: + if(detail::is_low_surrogate(*m_position)) + --m_position; + m_value = pending_read; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u16_to_u32_iterator() : m_position() + { + m_value = pending_read; + } + u16_to_u32_iterator(BaseIterator b) : m_position(b) + { + m_value = pending_read; + } +private: + static void invalid_code_point(::boost::uint16_t val) + { +#ifndef BOOST_NO_STD_LOCALE + std::stringstream ss; + ss << "Misplaced UTF-16 surrogate U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-32 sequence"; + std::out_of_range e(ss.str()); +#else + std::out_of_range e("Misplaced UTF-16 surrogate encountered while trying to encode UTF-32 sequence"); +#endif + boost::throw_exception(e); + } + void extract_current()const + { + m_value = static_cast<U32Type>(static_cast< ::boost::uint16_t>(*m_position)); + // if the last value is a high surrogate then adjust m_position and m_value as needed: + if(detail::is_high_surrogate(*m_position)) + { + // precondition; next value must have be a low-surrogate: + BaseIterator next(m_position); + ::boost::uint16_t t = *++next; + if((t & 0xFC00u) != 0xDC00u) + invalid_code_point(t); + m_value = (m_value - detail::high_surrogate_base) << 10; + m_value |= (static_cast<U32Type>(static_cast< ::boost::uint16_t>(t)) & detail::ten_bit_mask); + } + // postcondition; result must not be a surrogate: + if(detail::is_surrogate(m_value)) + invalid_code_point(static_cast< ::boost::uint16_t>(m_value)); + } + BaseIterator m_position; + mutable U32Type m_value; +}; + +template <class BaseIterator, class U8Type = ::boost::uint8_t> +class u32_to_u8_iterator + : public boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type> +{ + typedef boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type> base_type; + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32); + BOOST_STATIC_ASSERT(sizeof(U8Type)*CHAR_BIT == 8); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_current == 4) + extract_current(); + return m_values[m_current]; + } + bool equal(const u32_to_u8_iterator& that)const + { + if(m_position == that.m_position) + { + // either the m_current's must be equal, or one must be 0 and + // the other 4: which means neither must have bits 1 or 2 set: + return (m_current == that.m_current) + || (((m_current | that.m_current) & 3) == 0); + } + return false; + } + void increment() + { + // if we have a pending read then read now, so that we know whether + // to skip a position, or move to a low-surrogate: + if(m_current == 4) + { + // pending read: + extract_current(); + } + // move to the next surrogate position: + ++m_current; + // if we've reached the end skip a position: + if(m_values[m_current] == 0) + { + m_current = 4; + ++m_position; + } + } + void decrement() + { + if((m_current & 3) == 0) + { + --m_position; + extract_current(); + m_current = 3; + while(m_current && (m_values[m_current] == 0)) + --m_current; + } + else + --m_current; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u32_to_u8_iterator() : m_position(), m_current(0) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + m_values[3] = 0; + m_values[4] = 0; + } + u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + m_values[3] = 0; + m_values[4] = 0; + } +private: + + void extract_current()const + { + boost::uint32_t c = *m_position; + if(c > 0x10FFFFu) + detail::invalid_utf32_code_point(c); + if(c < 0x80u) + { + m_values[0] = static_cast<unsigned char>(c); + m_values[1] = static_cast<unsigned char>(0u); + m_values[2] = static_cast<unsigned char>(0u); + m_values[3] = static_cast<unsigned char>(0u); + } + else if(c < 0x800u) + { + m_values[0] = static_cast<unsigned char>(0xC0u + (c >> 6)); + m_values[1] = static_cast<unsigned char>(0x80u + (c & 0x3Fu)); + m_values[2] = static_cast<unsigned char>(0u); + m_values[3] = static_cast<unsigned char>(0u); + } + else if(c < 0x10000u) + { + m_values[0] = static_cast<unsigned char>(0xE0u + (c >> 12)); + m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu)); + m_values[2] = static_cast<unsigned char>(0x80u + (c & 0x3Fu)); + m_values[3] = static_cast<unsigned char>(0u); + } + else + { + m_values[0] = static_cast<unsigned char>(0xF0u + (c >> 18)); + m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 12) & 0x3Fu)); + m_values[2] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu)); + m_values[3] = static_cast<unsigned char>(0x80u + (c & 0x3Fu)); + } + m_current= 0; + } + BaseIterator m_position; + mutable U8Type m_values[5]; + mutable unsigned m_current; +}; + +template <class BaseIterator, class U32Type = ::boost::uint32_t> +class u8_to_u32_iterator + : public boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> +{ + typedef boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type; + // special values for pending iterator reads: + BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu); + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 8); + BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_value == pending_read) + extract_current(); + return m_value; + } + bool equal(const u8_to_u32_iterator& that)const + { + return m_position == that.m_position; + } + void increment() + { + // skip high surrogate first if there is one: + unsigned c = detail::utf8_byte_count(*m_position); + std::advance(m_position, c); + m_value = pending_read; + } + void decrement() + { + // Keep backtracking until we don't have a trailing character: + unsigned count = 0; + while((*--m_position & 0xC0u) == 0x80u) ++count; + // now check that the sequence was valid: + if(count != detail::utf8_trailing_byte_count(*m_position)) + invalid_sequnce(); + m_value = pending_read; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u8_to_u32_iterator() : m_position() + { + m_value = pending_read; + } + u8_to_u32_iterator(BaseIterator b) : m_position(b) + { + m_value = pending_read; + } +private: + static void invalid_sequnce() + { + std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character"); + boost::throw_exception(e); + } + void extract_current()const + { + m_value = static_cast<U32Type>(static_cast< ::boost::uint8_t>(*m_position)); + // we must not have a continuation character: + if((m_value & 0xC0u) == 0x80u) + invalid_sequnce(); + // see how many extra byts we have: + unsigned extra = detail::utf8_trailing_byte_count(*m_position); + // extract the extra bits, 6 from each extra byte: + BaseIterator next(m_position); + for(unsigned c = 0; c < extra; ++c) + { + ++next; + m_value <<= 6; + m_value += static_cast<boost::uint8_t>(*next) & 0x3Fu; + } + // we now need to remove a few of the leftmost bits, but how many depends + // upon how many extra bytes we've extracted: + static const boost::uint32_t masks[4] = + { + 0x7Fu, + 0x7FFu, + 0xFFFFu, + 0x1FFFFFu, + }; + m_value &= masks[extra]; + // check the result: + if(m_value > static_cast<U32Type>(0x10FFFFu)) + invalid_sequnce(); + } + BaseIterator m_position; + mutable U32Type m_value; +}; + +template <class BaseIterator> +class utf16_output_iterator +{ +public: + typedef void difference_type; + typedef void value_type; + typedef boost::uint32_t* pointer; + typedef boost::uint32_t& reference; + typedef std::output_iterator_tag iterator_category; + + utf16_output_iterator(const BaseIterator& b) + : m_position(b){} + utf16_output_iterator(const utf16_output_iterator& that) + : m_position(that.m_position){} + utf16_output_iterator& operator=(const utf16_output_iterator& that) + { + m_position = that.m_position; + return *this; + } + const utf16_output_iterator& operator*()const + { + return *this; + } + void operator=(boost::uint32_t val)const + { + push(val); + } + utf16_output_iterator& operator++() + { + return *this; + } + utf16_output_iterator& operator++(int) + { + return *this; + } + BaseIterator base()const + { + return m_position; + } +private: + void push(boost::uint32_t v)const + { + if(v >= 0x10000u) + { + // begin by checking for a code point out of range: + if(v > 0x10FFFFu) + detail::invalid_utf32_code_point(v); + // split into two surrogates: + *m_position++ = static_cast<boost::uint16_t>(v >> 10) + detail::high_surrogate_base; + *m_position++ = static_cast<boost::uint16_t>(v & detail::ten_bit_mask) + detail::low_surrogate_base; + } + else + { + // 16-bit code point: + // value must not be a surrogate: + if(detail::is_surrogate(v)) + detail::invalid_utf32_code_point(v); + *m_position++ = static_cast<boost::uint16_t>(v); + } + } + mutable BaseIterator m_position; +}; + +template <class BaseIterator> +class utf8_output_iterator +{ +public: + typedef void difference_type; + typedef void value_type; + typedef boost::uint32_t* pointer; + typedef boost::uint32_t& reference; + typedef std::output_iterator_tag iterator_category; + + utf8_output_iterator(const BaseIterator& b) + : m_position(b){} + utf8_output_iterator(const utf8_output_iterator& that) + : m_position(that.m_position){} + utf8_output_iterator& operator=(const utf8_output_iterator& that) + { + m_position = that.m_position; + return *this; + } + const utf8_output_iterator& operator*()const + { + return *this; + } + void operator=(boost::uint32_t val)const + { + push(val); + } + utf8_output_iterator& operator++() + { + return *this; + } + utf8_output_iterator& operator++(int) + { + return *this; + } + BaseIterator base()const + { + return m_position; + } +private: + void push(boost::uint32_t c)const + { + if(c > 0x10FFFFu) + detail::invalid_utf32_code_point(c); + if(c < 0x80u) + { + *m_position++ = static_cast<unsigned char>(c); + } + else if(c < 0x800u) + { + *m_position++ = static_cast<unsigned char>(0xC0u + (c >> 6)); + *m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu)); + } + else if(c < 0x10000u) + { + *m_position++ = static_cast<unsigned char>(0xE0u + (c >> 12)); + *m_position++ = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu)); + *m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu)); + } + else + { + *m_position++ = static_cast<unsigned char>(0xF0u + (c >> 18)); + *m_position++ = static_cast<unsigned char>(0x80u + ((c >> 12) & 0x3Fu)); + *m_position++ = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu)); + *m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu)); + } + } + mutable BaseIterator m_position; +}; + +} // namespace boost + +#endif // BOOST_REGEX_UNICODE_ITERATOR_HPP + diff --git a/ext/boost/regex/regex_traits.hpp b/ext/boost/regex/regex_traits.hpp new file mode 100644 index 0000000000..730ba6e0d8 --- /dev/null +++ b/ext/boost/regex/regex_traits.hpp @@ -0,0 +1,35 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_traits.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares regular expression traits classes. + */ + +#ifndef BOOST_REGEX_TRAITS_HPP +#define BOOST_REGEX_TRAITS_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +# include <boost/regex/config.hpp> +#endif + +# ifndef BOOST_REGEX_TRAITS_HPP_INCLUDED +# include <boost/regex/v4/regex_traits.hpp> +# endif + +#endif // include + + + + + diff --git a/ext/boost/regex/user.hpp b/ext/boost/regex/user.hpp new file mode 100644 index 0000000000..95908173d7 --- /dev/null +++ b/ext/boost/regex/user.hpp @@ -0,0 +1,90 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE user.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: User settable options. + */ + +// define if you want the regex library to use the C locale +// even on Win32: +// #define BOOST_REGEX_USE_C_LOCALE + +// define this is you want the regex library to use the C++ +// locale: +// #define BOOST_REGEX_USE_CPP_LOCALE + +// define this if the runtime library is a dll, and you +// want BOOST_REGEX_DYN_LINK to set up dll exports/imports +// with __declspec(dllexport)/__declspec(dllimport.) +// #define BOOST_REGEX_HAS_DLL_RUNTIME + +// define this if you want to dynamically link to regex, +// if the runtime library is also a dll (Probably Win32 specific, +// and has no effect unless BOOST_REGEX_HAS_DLL_RUNTIME is set): +// #define BOOST_REGEX_DYN_LINK + +// define this if you don't want the lib to automatically +// select its link libraries: +// #define BOOST_REGEX_NO_LIB + +// define this if templates with switch statements cause problems: +// #define BOOST_REGEX_NO_TEMPLATE_SWITCH_MERGE + +// define this to disable Win32 support when available: +// #define BOOST_REGEX_NO_W32 + +// define this if bool is not a real type: +// #define BOOST_REGEX_NO_BOOL + +// define this if no template instances are to be placed in +// the library rather than users object files: +// #define BOOST_REGEX_NO_EXTERNAL_TEMPLATES + +// define this if the forward declarations in regex_fwd.hpp +// cause more problems than they are worth: +// #define BOOST_REGEX_NO_FWD + +// define this if your compiler supports MS Windows structured +// exception handling. +// #define BOOST_REGEX_HAS_MS_STACK_GUARD + +// define this if you want to use the recursive algorithm +// even if BOOST_REGEX_HAS_MS_STACK_GUARD is not defined. +// #define BOOST_REGEX_RECURSIVE + +// define this if you want to use the non-recursive +// algorithm, even if the recursive version would be the default. +// #define BOOST_REGEX_NON_RECURSIVE + +// define this if you want to set the size of the memory blocks +// used by the non-recursive algorithm. +// #define BOOST_REGEX_BLOCKSIZE 4096 + +// define this if you want to set the maximum number of memory blocks +// used by the non-recursive algorithm. +// #define BOOST_REGEX_MAX_BLOCKS 1024 + +// define this if you want to set the maximum number of memory blocks +// cached by the non-recursive algorithm: Normally this is 16, but can be +// higher if you have multiple threads all using boost.regex, or lower +// if you don't want boost.regex to cache memory. +// #define BOOST_REGEX_MAX_CACHE_BLOCKS 16 + +// define this if you want to be able to access extended capture +// information in your sub_match's (caution this will slow things +// down quite a bit). +// #define BOOST_REGEX_MATCH_EXTRA + +// define this if you want to enable support for Unicode via ICU. +// #define BOOST_HAS_ICU diff --git a/ext/boost/regex/v4/basic_regex.hpp b/ext/boost/regex/v4/basic_regex.hpp new file mode 100644 index 0000000000..09b0467b40 --- /dev/null +++ b/ext/boost/regex/v4/basic_regex.hpp @@ -0,0 +1,825 @@ +/* + * + * Copyright (c) 1998-2004 + * John Maddock + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org/ for most recent version. + * FILE basic_regex.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares template class basic_regex. + */ + +#ifndef BOOST_REGEX_V4_BASIC_REGEX_HPP +#define BOOST_REGEX_V4_BASIC_REGEX_HPP + +#include <boost/type_traits/is_same.hpp> +#include <boost/functional/hash.hpp> + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable : 4251 4231 4660 4800) +#endif + +namespace re_detail{ + +// +// forward declaration, we will need this one later: +// +template <class charT, class traits> +class basic_regex_parser; + +template <class I> +void bubble_down_one(I first, I last) +{ + if(first != last) + { + I next = last - 1; + while((next != first) && !(*(next-1) < *next)) + { + (next-1)->swap(*next); + --next; + } + } +} + +// +// Class named_subexpressions +// Contains information about named subexpressions within the regex. +// +template <class charT> +class named_subexpressions_base +{ +public: + virtual int get_id(const charT* i, const charT* j)const = 0; + virtual int get_id(std::size_t hash)const = 0; +#ifdef __GNUC__ + // warning supression: + virtual ~named_subexpressions_base(){} +#endif +}; + +template <class Iterator> +inline std::size_t hash_value_from_capture_name(Iterator i, Iterator j) +{ + std::size_t r = boost::hash_range(i, j); + r %= ((std::numeric_limits<int>::max)() - 10001); + r += 10000; + return r; +} + +template <class charT> +class named_subexpressions : public named_subexpressions_base<charT> +{ + struct name + { + name(const charT* i, const charT* j, int idx) + : /*n(i, j), */ index(idx) + { + hash = hash_value_from_capture_name(i, j); + } + name(std::size_t h, int idx) + : index(idx), hash(h) + { + } + //std::vector<charT> n; + int index; + std::size_t hash; + bool operator < (const name& other)const + { + return hash < other.hash; //std::lexicographical_compare(n.begin(), n.end(), other.n.begin(), other.n.end()); + } + bool operator == (const name& other)const + { + return hash == other.hash; //n == other.n; + } + void swap(name& other) + { + //n.swap(other.n); + std::swap(index, other.index); + std::swap(hash, other.hash); + } + }; +public: + named_subexpressions(){} + void set_name(const charT* i, const charT* j, int index) + { + m_sub_names.push_back(name(i, j, index)); + bubble_down_one(m_sub_names.begin(), m_sub_names.end()); + } + int get_id(const charT* i, const charT* j)const + { + name t(i, j, 0); + typename std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } + int get_id(std::size_t h)const + { + name t(h, 0); + typename std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } +private: + std::vector<name> m_sub_names; +}; + +template <class charT, class Other> +class named_subexpressions_converter : public named_subexpressions_base<charT> +{ + boost::shared_ptr<named_subexpressions<Other> > m_converter; +public: + named_subexpressions_converter(boost::shared_ptr<named_subexpressions<Other> > s) + : m_converter(s) {} + int get_id(const charT* i, const charT* j)const + { + if(i == j) + return -1; + std::vector<Other> v; + while(i != j) + { + v.push_back(*i); + ++i; + } + return m_converter->get_id(&v[0], &v[0] + v.size()); + } + int get_id(std::size_t h)const + { + return m_converter->get_id(h); + } +}; + +template <class To> +inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs_imp( + boost::shared_ptr<named_subexpressions<To> > s, + boost::integral_constant<bool,true> const&) +{ + return s; +} +template <class To, class From> +inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs_imp( + boost::shared_ptr<named_subexpressions<From> > s, + boost::integral_constant<bool,false> const&) +{ + return boost::shared_ptr<named_subexpressions_converter<To, From> >(new named_subexpressions_converter<To, From>(s)); +} +template <class To, class From> +inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs( + boost::shared_ptr<named_subexpressions<From> > s) +{ + typedef typename boost::is_same<To, From>::type tag_type; + return convert_to_named_subs_imp<To>(s, tag_type()); +} +// +// class regex_data: +// represents the data we wish to expose to the matching algorithms. +// +template <class charT, class traits> +struct regex_data : public named_subexpressions<charT> +{ + typedef regex_constants::syntax_option_type flag_type; + typedef std::size_t size_type; + + regex_data(const ::boost::shared_ptr< + ::boost::regex_traits_wrapper<traits> >& t) + : m_ptraits(t), m_expression(0), m_expression_len(0) {} + regex_data() + : m_ptraits(new ::boost::regex_traits_wrapper<traits>()), m_expression(0), m_expression_len(0) {} + + ::boost::shared_ptr< + ::boost::regex_traits_wrapper<traits> + > m_ptraits; // traits class instance + flag_type m_flags; // flags with which we were compiled + int m_status; // error code (0 implies OK). + const charT* m_expression; // the original expression + std::ptrdiff_t m_expression_len; // the length of the original expression + size_type m_mark_count; // the number of marked sub-expressions + re_detail::re_syntax_base* m_first_state; // the first state of the machine + unsigned m_restart_type; // search optimisation type + unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match + unsigned int m_can_be_null; // whether we can match a null string + re_detail::raw_storage m_data; // the buffer in which our states are constructed + typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character + std::vector< + std::pair< + std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*. + bool m_has_recursions; // whether we have recursive expressions; +}; +// +// class basic_regex_implementation +// pimpl implementation class for basic_regex. +// +template <class charT, class traits> +class basic_regex_implementation + : public regex_data<charT, traits> +{ +public: + typedef regex_constants::syntax_option_type flag_type; + typedef std::ptrdiff_t difference_type; + typedef std::size_t size_type; + typedef typename traits::locale_type locale_type; + typedef const charT* const_iterator; + + basic_regex_implementation(){} + basic_regex_implementation(const ::boost::shared_ptr< + ::boost::regex_traits_wrapper<traits> >& t) + : regex_data<charT, traits>(t) {} + void assign(const charT* arg_first, + const charT* arg_last, + flag_type f) + { + regex_data<charT, traits>* pdat = this; + basic_regex_parser<charT, traits> parser(pdat); + parser.parse(arg_first, arg_last, f); + } + + locale_type BOOST_REGEX_CALL imbue(locale_type l) + { + return this->m_ptraits->imbue(l); + } + locale_type BOOST_REGEX_CALL getloc()const + { + return this->m_ptraits->getloc(); + } + std::basic_string<charT> BOOST_REGEX_CALL str()const + { + std::basic_string<charT> result; + if(this->m_status == 0) + result = std::basic_string<charT>(this->m_expression, this->m_expression_len); + return result; + } + const_iterator BOOST_REGEX_CALL expression()const + { + return this->m_expression; + } + std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const + { + if(n == 0) + throw std::out_of_range("0 is not a valid subexpression index."); + const std::pair<std::size_t, std::size_t>& pi = this->m_subs.at(n - 1); + std::pair<const_iterator, const_iterator> p(expression() + pi.first, expression() + pi.second); + return p; + } + // + // begin, end: + const_iterator BOOST_REGEX_CALL begin()const + { + return (!this->m_status ? 0 : this->m_expression); + } + const_iterator BOOST_REGEX_CALL end()const + { + return (!this->m_status ? 0 : this->m_expression + this->m_expression_len); + } + flag_type BOOST_REGEX_CALL flags()const + { + return this->m_flags; + } + size_type BOOST_REGEX_CALL size()const + { + return this->m_expression_len; + } + int BOOST_REGEX_CALL status()const + { + return this->m_status; + } + size_type BOOST_REGEX_CALL mark_count()const + { + return this->m_mark_count; + } + const re_detail::re_syntax_base* get_first_state()const + { + return this->m_first_state; + } + unsigned get_restart_type()const + { + return this->m_restart_type; + } + const unsigned char* get_map()const + { + return this->m_startmap; + } + const ::boost::regex_traits_wrapper<traits>& get_traits()const + { + return *(this->m_ptraits); + } + bool can_be_null()const + { + return this->m_can_be_null; + } + const regex_data<charT, traits>& get_data()const + { + basic_regex_implementation<charT, traits> const* p = this; + return *static_cast<const regex_data<charT, traits>*>(p); + } +}; + +} // namespace re_detail +// +// class basic_regex: +// represents the compiled +// regular expression: +// + +#ifdef BOOST_REGEX_NO_FWD +template <class charT, class traits = regex_traits<charT> > +#else +template <class charT, class traits > +#endif +class basic_regex : public regbase +{ +public: + // typedefs: + typedef std::size_t traits_size_type; + typedef typename traits::string_type traits_string_type; + typedef charT char_type; + typedef traits traits_type; + + typedef charT value_type; + typedef charT& reference; + typedef const charT& const_reference; + typedef const charT* const_iterator; + typedef const_iterator iterator; + typedef std::ptrdiff_t difference_type; + typedef std::size_t size_type; + typedef regex_constants::syntax_option_type flag_type; + // locale_type + // placeholder for actual locale type used by the + // traits class to localise *this. + typedef typename traits::locale_type locale_type; + +public: + explicit basic_regex(){} + explicit basic_regex(const charT* p, flag_type f = regex_constants::normal) + { + assign(p, f); + } + basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + { + assign(p1, p2, f); + } + basic_regex(const charT* p, size_type len, flag_type f) + { + assign(p, len, f); + } + basic_regex(const basic_regex& that) + : m_pimpl(that.m_pimpl) {} + ~basic_regex(){} + basic_regex& BOOST_REGEX_CALL operator=(const basic_regex& that) + { + return assign(that); + } + basic_regex& BOOST_REGEX_CALL operator=(const charT* ptr) + { + return assign(ptr); + } + + // + // assign: + basic_regex& assign(const basic_regex& that) + { + m_pimpl = that.m_pimpl; + return *this; + } + basic_regex& assign(const charT* p, flag_type f = regex_constants::normal) + { + return assign(p, p + traits::length(p), f); + } + basic_regex& assign(const charT* p, size_type len, flag_type f) + { + return assign(p, p + len, f); + } +private: + basic_regex& do_assign(const charT* p1, + const charT* p2, + flag_type f); +public: + basic_regex& assign(const charT* p1, + const charT* p2, + flag_type f = regex_constants::normal) + { + return do_assign(p1, p2, f); + } +#if !defined(BOOST_NO_MEMBER_TEMPLATES) + + template <class ST, class SA> + unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal) + { + return set_expression(p.data(), p.data() + p.size(), f); + } + + template <class ST, class SA> + explicit basic_regex(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal) + { + assign(p, f); + } + + template <class InputIterator> + basic_regex(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal) + { + typedef typename traits::string_type seq_type; + seq_type a(arg_first, arg_last); + if(a.size()) + assign(&*a.begin(), &*a.begin() + a.size(), f); + else + assign(static_cast<const charT*>(0), static_cast<const charT*>(0), f); + } + + template <class ST, class SA> + basic_regex& BOOST_REGEX_CALL operator=(const std::basic_string<charT, ST, SA>& p) + { + return assign(p.data(), p.data() + p.size(), regex_constants::normal); + } + + template <class string_traits, class A> + basic_regex& BOOST_REGEX_CALL assign( + const std::basic_string<charT, string_traits, A>& s, + flag_type f = regex_constants::normal) + { + return assign(s.data(), s.data() + s.size(), f); + } + + template <class InputIterator> + basic_regex& BOOST_REGEX_CALL assign(InputIterator arg_first, + InputIterator arg_last, + flag_type f = regex_constants::normal) + { + typedef typename traits::string_type seq_type; + seq_type a(arg_first, arg_last); + if(a.size()) + { + const charT* p1 = &*a.begin(); + const charT* p2 = &*a.begin() + a.size(); + return assign(p1, p2, f); + } + return assign(static_cast<const charT*>(0), static_cast<const charT*>(0), f); + } +#else + unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string<charT>& p, flag_type f = regex_constants::normal) + { + return set_expression(p.data(), p.data() + p.size(), f); + } + + basic_regex(const std::basic_string<charT>& p, flag_type f = regex_constants::normal) + { + assign(p, f); + } + + basic_regex& BOOST_REGEX_CALL operator=(const std::basic_string<charT>& p) + { + return assign(p.data(), p.data() + p.size(), regex_constants::normal); + } + + basic_regex& BOOST_REGEX_CALL assign( + const std::basic_string<charT>& s, + flag_type f = regex_constants::normal) + { + return assign(s.data(), s.data() + s.size(), f); + } + +#endif + + // + // locale: + locale_type BOOST_REGEX_CALL imbue(locale_type l); + locale_type BOOST_REGEX_CALL getloc()const + { + return m_pimpl.get() ? m_pimpl->getloc() : locale_type(); + } + // + // getflags: + // retained for backwards compatibility only, "flags" + // is now the preferred name: + flag_type BOOST_REGEX_CALL getflags()const + { + return flags(); + } + flag_type BOOST_REGEX_CALL flags()const + { + return m_pimpl.get() ? m_pimpl->flags() : 0; + } + // + // str: + std::basic_string<charT> BOOST_REGEX_CALL str()const + { + return m_pimpl.get() ? m_pimpl->str() : std::basic_string<charT>(); + } + // + // begin, end, subexpression: + std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const + { + if(!m_pimpl.get()) + throw std::logic_error("Can't access subexpressions in an invalid regex."); + return m_pimpl->subexpression(n); + } + const_iterator BOOST_REGEX_CALL begin()const + { + return (m_pimpl.get() ? m_pimpl->begin() : 0); + } + const_iterator BOOST_REGEX_CALL end()const + { + return (m_pimpl.get() ? m_pimpl->end() : 0); + } + // + // swap: + void BOOST_REGEX_CALL swap(basic_regex& that)throw() + { + m_pimpl.swap(that.m_pimpl); + } + // + // size: + size_type BOOST_REGEX_CALL size()const + { + return (m_pimpl.get() ? m_pimpl->size() : 0); + } + // + // max_size: + size_type BOOST_REGEX_CALL max_size()const + { + return UINT_MAX; + } + // + // empty: + bool BOOST_REGEX_CALL empty()const + { + return (m_pimpl.get() ? 0 != m_pimpl->status() : true); + } + + size_type BOOST_REGEX_CALL mark_count()const + { + return (m_pimpl.get() ? m_pimpl->mark_count() : 0); + } + + int status()const + { + return (m_pimpl.get() ? m_pimpl->status() : regex_constants::error_empty); + } + + int BOOST_REGEX_CALL compare(const basic_regex& that) const + { + if(m_pimpl.get() == that.m_pimpl.get()) + return 0; + if(!m_pimpl.get()) + return -1; + if(!that.m_pimpl.get()) + return 1; + if(status() != that.status()) + return status() - that.status(); + if(flags() != that.flags()) + return flags() - that.flags(); + return str().compare(that.str()); + } + bool BOOST_REGEX_CALL operator==(const basic_regex& e)const + { + return compare(e) == 0; + } + bool BOOST_REGEX_CALL operator != (const basic_regex& e)const + { + return compare(e) != 0; + } + bool BOOST_REGEX_CALL operator<(const basic_regex& e)const + { + return compare(e) < 0; + } + bool BOOST_REGEX_CALL operator>(const basic_regex& e)const + { + return compare(e) > 0; + } + bool BOOST_REGEX_CALL operator<=(const basic_regex& e)const + { + return compare(e) <= 0; + } + bool BOOST_REGEX_CALL operator>=(const basic_regex& e)const + { + return compare(e) >= 0; + } + + // + // The following are deprecated as public interfaces + // but are available for compatibility with earlier versions. + const charT* BOOST_REGEX_CALL expression()const + { + return (m_pimpl.get() && !m_pimpl->status() ? m_pimpl->expression() : 0); + } + unsigned int BOOST_REGEX_CALL set_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + { + assign(p1, p2, f | regex_constants::no_except); + return status(); + } + unsigned int BOOST_REGEX_CALL set_expression(const charT* p, flag_type f = regex_constants::normal) + { + assign(p, f | regex_constants::no_except); + return status(); + } + unsigned int BOOST_REGEX_CALL error_code()const + { + return status(); + } + // + // private access methods: + // + const re_detail::re_syntax_base* get_first_state()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_first_state(); + } + unsigned get_restart_type()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_restart_type(); + } + const unsigned char* get_map()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_map(); + } + const ::boost::regex_traits_wrapper<traits>& get_traits()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_traits(); + } + bool can_be_null()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->can_be_null(); + } + const re_detail::regex_data<charT, traits>& get_data()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_data(); + } + boost::shared_ptr<re_detail::named_subexpressions<charT> > get_named_subs()const + { + return m_pimpl; + } + +private: + shared_ptr<re_detail::basic_regex_implementation<charT, traits> > m_pimpl; +}; + +// +// out of line members; +// these are the only members that mutate the basic_regex object, +// and are designed to provide the strong exception guarentee +// (in the event of a throw, the state of the object remains unchanged). +// +template <class charT, class traits> +basic_regex<charT, traits>& basic_regex<charT, traits>::do_assign(const charT* p1, + const charT* p2, + flag_type f) +{ + shared_ptr<re_detail::basic_regex_implementation<charT, traits> > temp; + if(!m_pimpl.get()) + { + temp = shared_ptr<re_detail::basic_regex_implementation<charT, traits> >(new re_detail::basic_regex_implementation<charT, traits>()); + } + else + { + temp = shared_ptr<re_detail::basic_regex_implementation<charT, traits> >(new re_detail::basic_regex_implementation<charT, traits>(m_pimpl->m_ptraits)); + } + temp->assign(p1, p2, f); + temp.swap(m_pimpl); + return *this; +} + +template <class charT, class traits> +typename basic_regex<charT, traits>::locale_type BOOST_REGEX_CALL basic_regex<charT, traits>::imbue(locale_type l) +{ + shared_ptr<re_detail::basic_regex_implementation<charT, traits> > temp(new re_detail::basic_regex_implementation<charT, traits>()); + locale_type result = temp->imbue(l); + temp.swap(m_pimpl); + return result; +} + +// +// non-members: +// +template <class charT, class traits> +void swap(basic_regex<charT, traits>& e1, basic_regex<charT, traits>& e2) +{ + e1.swap(e2); +} + +#ifndef BOOST_NO_STD_LOCALE +template <class charT, class traits, class traits2> +std::basic_ostream<charT, traits>& + operator << (std::basic_ostream<charT, traits>& os, + const basic_regex<charT, traits2>& e) +{ + return (os << e.str()); +} +#else +template <class traits> +std::ostream& operator << (std::ostream& os, const basic_regex<char, traits>& e) +{ + return (os << e.str()); +} +#endif + +// +// class reg_expression: +// this is provided for backwards compatibility only, +// it is deprecated, no not use! +// +#ifdef BOOST_REGEX_NO_FWD +template <class charT, class traits = regex_traits<charT> > +#else +template <class charT, class traits > +#endif +class reg_expression : public basic_regex<charT, traits> +{ +public: + typedef typename basic_regex<charT, traits>::flag_type flag_type; + typedef typename basic_regex<charT, traits>::size_type size_type; + explicit reg_expression(){} + explicit reg_expression(const charT* p, flag_type f = regex_constants::normal) + : basic_regex<charT, traits>(p, f){} + reg_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + : basic_regex<charT, traits>(p1, p2, f){} + reg_expression(const charT* p, size_type len, flag_type f) + : basic_regex<charT, traits>(p, len, f){} + reg_expression(const reg_expression& that) + : basic_regex<charT, traits>(that) {} + ~reg_expression(){} + reg_expression& BOOST_REGEX_CALL operator=(const reg_expression& that) + { + return this->assign(that); + } + +#if !defined(BOOST_NO_MEMBER_TEMPLATES) + template <class ST, class SA> + explicit reg_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal) + : basic_regex<charT, traits>(p, f) + { + } + + template <class InputIterator> + reg_expression(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal) + : basic_regex<charT, traits>(arg_first, arg_last, f) + { + } + + template <class ST, class SA> + reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string<charT, ST, SA>& p) + { + this->assign(p); + return *this; + } +#else + explicit reg_expression(const std::basic_string<charT>& p, flag_type f = regex_constants::normal) + : basic_regex<charT, traits>(p, f) + { + } + + reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string<charT>& p) + { + this->assign(p); + return *this; + } +#endif + +}; + +#ifdef BOOST_MSVC +#pragma warning (pop) +#endif + +} // namespace boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/ext/boost/regex/v4/basic_regex_creator.hpp b/ext/boost/regex/v4/basic_regex_creator.hpp new file mode 100644 index 0000000000..6f0050542d --- /dev/null +++ b/ext/boost/regex/v4/basic_regex_creator.hpp @@ -0,0 +1,1436 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_creator.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares template class basic_regex_creator which fills in + * the data members of a regex_data object. + */ + +#ifndef BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP +#define BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace boost{ + +namespace re_detail{ + +template <class charT> +struct digraph : public std::pair<charT, charT> +{ + digraph() : std::pair<charT, charT>(0, 0){} + digraph(charT c1) : std::pair<charT, charT>(c1, 0){} + digraph(charT c1, charT c2) : std::pair<charT, charT>(c1, c2) + {} +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) + digraph(const digraph<charT>& d) : std::pair<charT, charT>(d.first, d.second){} +#endif + template <class Seq> + digraph(const Seq& s) : std::pair<charT, charT>() + { + BOOST_ASSERT(s.size() <= 2); + BOOST_ASSERT(s.size()); + this->first = s[0]; + this->second = (s.size() > 1) ? s[1] : 0; + } +}; + +template <class charT, class traits> +class basic_char_set +{ +public: + typedef digraph<charT> digraph_type; + typedef typename traits::string_type string_type; + typedef typename traits::char_class_type mask_type; + + basic_char_set() + { + m_negate = false; + m_has_digraphs = false; + m_classes = 0; + m_negated_classes = 0; + m_empty = true; + } + + void add_single(const digraph_type& s) + { + m_singles.insert(m_singles.end(), s); + if(s.second) + m_has_digraphs = true; + m_empty = false; + } + void add_range(const digraph_type& first, const digraph_type& end) + { + m_ranges.insert(m_ranges.end(), first); + m_ranges.insert(m_ranges.end(), end); + if(first.second) + { + m_has_digraphs = true; + add_single(first); + } + if(end.second) + { + m_has_digraphs = true; + add_single(end); + } + m_empty = false; + } + void add_class(mask_type m) + { + m_classes |= m; + m_empty = false; + } + void add_negated_class(mask_type m) + { + m_negated_classes |= m; + m_empty = false; + } + void add_equivalent(const digraph_type& s) + { + m_equivalents.insert(m_equivalents.end(), s); + if(s.second) + { + m_has_digraphs = true; + add_single(s); + } + m_empty = false; + } + void negate() + { + m_negate = true; + //m_empty = false; + } + + // + // accessor functions: + // + bool has_digraphs()const + { + return m_has_digraphs; + } + bool is_negated()const + { + return m_negate; + } + typedef typename std::vector<digraph_type>::const_iterator list_iterator; + list_iterator singles_begin()const + { + return m_singles.begin(); + } + list_iterator singles_end()const + { + return m_singles.end(); + } + list_iterator ranges_begin()const + { + return m_ranges.begin(); + } + list_iterator ranges_end()const + { + return m_ranges.end(); + } + list_iterator equivalents_begin()const + { + return m_equivalents.begin(); + } + list_iterator equivalents_end()const + { + return m_equivalents.end(); + } + mask_type classes()const + { + return m_classes; + } + mask_type negated_classes()const + { + return m_negated_classes; + } + bool empty()const + { + return m_empty; + } +private: + std::vector<digraph_type> m_singles; // a list of single characters to match + std::vector<digraph_type> m_ranges; // a list of end points of our ranges + bool m_negate; // true if the set is to be negated + bool m_has_digraphs; // true if we have digraphs present + mask_type m_classes; // character classes to match + mask_type m_negated_classes; // negated character classes to match + bool m_empty; // whether we've added anything yet + std::vector<digraph_type> m_equivalents; // a list of equivalence classes +}; + +template <class charT, class traits> +class basic_regex_creator +{ +public: + basic_regex_creator(regex_data<charT, traits>* data); + std::ptrdiff_t getoffset(void* addr) + { + return getoffset(addr, m_pdata->m_data.data()); + } + std::ptrdiff_t getoffset(const void* addr, const void* base) + { + return static_cast<const char*>(addr) - static_cast<const char*>(base); + } + re_syntax_base* getaddress(std::ptrdiff_t off) + { + return getaddress(off, m_pdata->m_data.data()); + } + re_syntax_base* getaddress(std::ptrdiff_t off, void* base) + { + return static_cast<re_syntax_base*>(static_cast<void*>(static_cast<char*>(base) + off)); + } + void init(unsigned l_flags) + { + m_pdata->m_flags = l_flags; + m_icase = l_flags & regex_constants::icase; + } + regbase::flag_type flags() + { + return m_pdata->m_flags; + } + void flags(regbase::flag_type f) + { + m_pdata->m_flags = f; + if(m_icase != static_cast<bool>(f & regbase::icase)) + { + m_icase = static_cast<bool>(f & regbase::icase); + } + } + re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base)); + re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base)); + re_literal* append_literal(charT c); + re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set); + re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::false_*); + re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::true_*); + void finalize(const charT* p1, const charT* p2); +protected: + regex_data<charT, traits>* m_pdata; // pointer to the basic_regex_data struct we are filling in + const ::boost::regex_traits_wrapper<traits>& + m_traits; // convenience reference to traits class + re_syntax_base* m_last_state; // the last state we added + bool m_icase; // true for case insensitive matches + unsigned m_repeater_id; // the state_id of the next repeater + bool m_has_backrefs; // true if there are actually any backrefs + unsigned m_backrefs; // bitmask of permitted backrefs + boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; + bool m_has_recursions; // set when we have recursive expresisons to fixup + typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character + typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character + typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character + typename traits::char_class_type m_upper_mask; // mask used to determine if a character is an uppercase character + typename traits::char_class_type m_alpha_mask; // mask used to determine if a character is an alphabetic character +private: + basic_regex_creator& operator=(const basic_regex_creator&); + basic_regex_creator(const basic_regex_creator&); + + void fixup_pointers(re_syntax_base* state); + void fixup_recursions(re_syntax_base* state); + void create_startmaps(re_syntax_base* state); + int calculate_backstep(re_syntax_base* state); + void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask); + unsigned get_restart_type(re_syntax_base* state); + void set_all_masks(unsigned char* bits, unsigned char); + bool is_bad_repeat(re_syntax_base* pt); + void set_bad_repeat(re_syntax_base* pt); + syntax_element_type get_repeat_type(re_syntax_base* state); + void probe_leading_repeat(re_syntax_base* state); +}; + +template <class charT, class traits> +basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data) + : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false) +{ + m_pdata->m_data.clear(); + m_pdata->m_status = ::boost::regex_constants::error_ok; + static const charT w = 'w'; + static const charT s = 's'; + static const charT l[5] = { 'l', 'o', 'w', 'e', 'r', }; + static const charT u[5] = { 'u', 'p', 'p', 'e', 'r', }; + static const charT a[5] = { 'a', 'l', 'p', 'h', 'a', }; + m_word_mask = m_traits.lookup_classname(&w, &w +1); + m_mask_space = m_traits.lookup_classname(&s, &s +1); + m_lower_mask = m_traits.lookup_classname(l, l + 5); + m_upper_mask = m_traits.lookup_classname(u, u + 5); + m_alpha_mask = m_traits.lookup_classname(a, a + 5); + m_pdata->m_word_mask = m_word_mask; + BOOST_ASSERT(m_word_mask != 0); + BOOST_ASSERT(m_mask_space != 0); + BOOST_ASSERT(m_lower_mask != 0); + BOOST_ASSERT(m_upper_mask != 0); + BOOST_ASSERT(m_alpha_mask != 0); +} + +template <class charT, class traits> +re_syntax_base* basic_regex_creator<charT, traits>::append_state(syntax_element_type t, std::size_t s) +{ + // if the state is a backref then make a note of it: + if(t == syntax_element_backref) + this->m_has_backrefs = true; + // append a new state, start by aligning our last one: + m_pdata->m_data.align(); + // set the offset to the next state in our last one: + if(m_last_state) + m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state); + // now actually extent our data: + m_last_state = static_cast<re_syntax_base*>(m_pdata->m_data.extend(s)); + // fill in boilerplate options in the new state: + m_last_state->next.i = 0; + m_last_state->type = t; + return m_last_state; +} + +template <class charT, class traits> +re_syntax_base* basic_regex_creator<charT, traits>::insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s) +{ + // append a new state, start by aligning our last one: + m_pdata->m_data.align(); + // set the offset to the next state in our last one: + if(m_last_state) + m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state); + // remember the last state position: + std::ptrdiff_t off = getoffset(m_last_state) + s; + // now actually insert our data: + re_syntax_base* new_state = static_cast<re_syntax_base*>(m_pdata->m_data.insert(pos, s)); + // fill in boilerplate options in the new state: + new_state->next.i = s; + new_state->type = t; + m_last_state = getaddress(off); + return new_state; +} + +template <class charT, class traits> +re_literal* basic_regex_creator<charT, traits>::append_literal(charT c) +{ + re_literal* result; + // start by seeing if we have an existing re_literal we can extend: + if((0 == m_last_state) || (m_last_state->type != syntax_element_literal)) + { + // no existing re_literal, create a new one: + result = static_cast<re_literal*>(append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT))); + result->length = 1; + *static_cast<charT*>(static_cast<void*>(result+1)) = m_traits.translate(c, m_icase); + } + else + { + // we have an existing re_literal, extend it: + std::ptrdiff_t off = getoffset(m_last_state); + m_pdata->m_data.extend(sizeof(charT)); + m_last_state = result = static_cast<re_literal*>(getaddress(off)); + charT* characters = static_cast<charT*>(static_cast<void*>(result+1)); + characters[result->length] = m_traits.translate(c, m_icase); + ++(result->length); + } + return result; +} + +template <class charT, class traits> +inline re_syntax_base* basic_regex_creator<charT, traits>::append_set( + const basic_char_set<charT, traits>& char_set) +{ + typedef mpl::bool_< (sizeof(charT) == 1) > truth_type; + return char_set.has_digraphs() + ? append_set(char_set, static_cast<mpl::false_*>(0)) + : append_set(char_set, static_cast<truth_type*>(0)); +} + +template <class charT, class traits> +re_syntax_base* basic_regex_creator<charT, traits>::append_set( + const basic_char_set<charT, traits>& char_set, mpl::false_*) +{ + typedef typename traits::string_type string_type; + typedef typename basic_char_set<charT, traits>::list_iterator item_iterator; + typedef typename traits::char_class_type mask_type; + + re_set_long<mask_type>* result = static_cast<re_set_long<mask_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<mask_type>))); + // + // fill in the basics: + // + result->csingles = static_cast<unsigned int>(::boost::re_detail::distance(char_set.singles_begin(), char_set.singles_end())); + result->cranges = static_cast<unsigned int>(::boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2; + result->cequivalents = static_cast<unsigned int>(::boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end())); + result->cclasses = char_set.classes(); + result->cnclasses = char_set.negated_classes(); + if(flags() & regbase::icase) + { + // adjust classes as needed: + if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask)) + result->cclasses |= m_alpha_mask; + if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask)) + result->cnclasses |= m_alpha_mask; + } + + result->isnot = char_set.is_negated(); + result->singleton = !char_set.has_digraphs(); + // + // remember where the state is for later: + // + std::ptrdiff_t offset = getoffset(result); + // + // now extend with all the singles: + // + item_iterator first, last; + first = char_set.singles_begin(); + last = char_set.singles_end(); + while(first != last) + { + charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2))); + p[0] = m_traits.translate(first->first, m_icase); + if(first->second) + { + p[1] = m_traits.translate(first->second, m_icase); + p[2] = 0; + } + else + p[1] = 0; + ++first; + } + // + // now extend with all the ranges: + // + first = char_set.ranges_begin(); + last = char_set.ranges_end(); + while(first != last) + { + // first grab the endpoints of the range: + digraph<charT> c1 = *first; + c1.first = this->m_traits.translate(c1.first, this->m_icase); + c1.second = this->m_traits.translate(c1.second, this->m_icase); + ++first; + digraph<charT> c2 = *first; + c2.first = this->m_traits.translate(c2.first, this->m_icase); + c2.second = this->m_traits.translate(c2.second, this->m_icase); + ++first; + string_type s1, s2; + // different actions now depending upon whether collation is turned on: + if(flags() & regex_constants::collate) + { + // we need to transform our range into sort keys: +#if BOOST_WORKAROUND(__GNUC__, < 3) + string_type in(3, charT(0)); + in[0] = c1.first; + in[1] = c1.second; + s1 = this->m_traits.transform(in.c_str(), (in[1] ? in.c_str()+2 : in.c_str()+1)); + in[0] = c2.first; + in[1] = c2.second; + s2 = this->m_traits.transform(in.c_str(), (in[1] ? in.c_str()+2 : in.c_str()+1)); +#else + charT a1[3] = { c1.first, c1.second, charT(0), }; + charT a2[3] = { c2.first, c2.second, charT(0), }; + s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1)); + s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1)); +#endif + if(s1.size() == 0) + s1 = string_type(1, charT(0)); + if(s2.size() == 0) + s2 = string_type(1, charT(0)); + } + else + { + if(c1.second) + { + s1.insert(s1.end(), c1.first); + s1.insert(s1.end(), c1.second); + } + else + s1 = string_type(1, c1.first); + if(c2.second) + { + s2.insert(s2.end(), c2.first); + s2.insert(s2.end(), c2.second); + } + else + s2.insert(s2.end(), c2.first); + } + if(s1 > s2) + { + // Oops error: + return 0; + } + charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) ); + re_detail::copy(s1.begin(), s1.end(), p); + p[s1.size()] = charT(0); + p += s1.size() + 1; + re_detail::copy(s2.begin(), s2.end(), p); + p[s2.size()] = charT(0); + } + // + // now process the equivalence classes: + // + first = char_set.equivalents_begin(); + last = char_set.equivalents_end(); + while(first != last) + { + string_type s; + if(first->second) + { +#if BOOST_WORKAROUND(__GNUC__, < 3) + string_type in(3, charT(0)); + in[0] = first->first; + in[1] = first->second; + s = m_traits.transform_primary(in.c_str(), in.c_str()+2); +#else + charT cs[3] = { first->first, first->second, charT(0), }; + s = m_traits.transform_primary(cs, cs+2); +#endif + } + else + s = m_traits.transform_primary(&first->first, &first->first+1); + if(s.empty()) + return 0; // invalid or unsupported equivalence class + charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) ); + re_detail::copy(s.begin(), s.end(), p); + p[s.size()] = charT(0); + ++first; + } + // + // finally reset the address of our last state: + // + m_last_state = result = static_cast<re_set_long<mask_type>*>(getaddress(offset)); + return result; +} + +namespace{ + +template<class T> +inline bool char_less(T t1, T t2) +{ + return t1 < t2; +} +template<> +inline bool char_less<char>(char t1, char t2) +{ + return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2); +} +template<> +inline bool char_less<signed char>(signed char t1, signed char t2) +{ + return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2); +} +} + +template <class charT, class traits> +re_syntax_base* basic_regex_creator<charT, traits>::append_set( + const basic_char_set<charT, traits>& char_set, mpl::true_*) +{ + typedef typename traits::string_type string_type; + typedef typename basic_char_set<charT, traits>::list_iterator item_iterator; + + re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set))); + bool negate = char_set.is_negated(); + std::memset(result->_map, 0, sizeof(result->_map)); + // + // handle singles first: + // + item_iterator first, last; + first = char_set.singles_begin(); + last = char_set.singles_end(); + while(first != last) + { + for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i) + { + if(this->m_traits.translate(static_cast<charT>(i), this->m_icase) + == this->m_traits.translate(first->first, this->m_icase)) + result->_map[i] = true; + } + ++first; + } + // + // OK now handle ranges: + // + first = char_set.ranges_begin(); + last = char_set.ranges_end(); + while(first != last) + { + // first grab the endpoints of the range: + charT c1 = this->m_traits.translate(first->first, this->m_icase); + ++first; + charT c2 = this->m_traits.translate(first->first, this->m_icase); + ++first; + // different actions now depending upon whether collation is turned on: + if(flags() & regex_constants::collate) + { + // we need to transform our range into sort keys: + charT c3[2] = { c1, charT(0), }; + string_type s1 = this->m_traits.transform(c3, c3+1); + c3[0] = c2; + string_type s2 = this->m_traits.transform(c3, c3+1); + if(s1 > s2) + { + // Oops error: + return 0; + } + BOOST_ASSERT(c3[1] == charT(0)); + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + c3[0] = static_cast<charT>(i); + string_type s3 = this->m_traits.transform(c3, c3 +1); + if((s1 <= s3) && (s3 <= s2)) + result->_map[i] = true; + } + } + else + { + if(char_less<charT>(c2, c1)) + { + // Oops error: + return 0; + } + // everything in range matches: + std::memset(result->_map + static_cast<unsigned char>(c1), true, 1 + static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1)); + } + } + // + // and now the classes: + // + typedef typename traits::char_class_type mask_type; + mask_type m = char_set.classes(); + if(flags() & regbase::icase) + { + // adjust m as needed: + if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask)) + m |= m_alpha_mask; + } + if(m != 0) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + if(this->m_traits.isctype(static_cast<charT>(i), m)) + result->_map[i] = true; + } + } + // + // and now the negated classes: + // + m = char_set.negated_classes(); + if(flags() & regbase::icase) + { + // adjust m as needed: + if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask)) + m |= m_alpha_mask; + } + if(m != 0) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + if(0 == this->m_traits.isctype(static_cast<charT>(i), m)) + result->_map[i] = true; + } + } + // + // now process the equivalence classes: + // + first = char_set.equivalents_begin(); + last = char_set.equivalents_end(); + while(first != last) + { + string_type s; + BOOST_ASSERT(static_cast<charT>(0) == first->second); + s = m_traits.transform_primary(&first->first, &first->first+1); + if(s.empty()) + return 0; // invalid or unsupported equivalence class + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + charT c[2] = { (static_cast<charT>(i)), charT(0), }; + string_type s2 = this->m_traits.transform_primary(c, c+1); + if(s == s2) + result->_map[i] = true; + } + ++first; + } + if(negate) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + result->_map[i] = !(result->_map[i]); + } + } + return result; +} + +template <class charT, class traits> +void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2) +{ + // we've added all the states we need, now finish things off. + // start by adding a terminating state: + append_state(syntax_element_match); + // extend storage to store original expression: + std::ptrdiff_t len = p2 - p1; + m_pdata->m_expression_len = len; + charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1)))); + m_pdata->m_expression = ps; + re_detail::copy(p1, p2, ps); + ps[p2 - p1] = 0; + // fill in our other data... + // successful parsing implies a zero status: + m_pdata->m_status = 0; + // get the first state of the machine: + m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data()); + // fixup pointers in the machine: + fixup_pointers(m_pdata->m_first_state); + if(m_has_recursions) + { + m_pdata->m_has_recursions = true; + fixup_recursions(m_pdata->m_first_state); + } + else + m_pdata->m_has_recursions = false; + // create nested startmaps: + create_startmaps(m_pdata->m_first_state); + // create main startmap: + std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap)); + m_pdata->m_can_be_null = 0; + + m_bad_repeats = 0; + create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all); + // get the restart type: + m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state); + // optimise a leading repeat if there is one: + probe_leading_repeat(m_pdata->m_first_state); +} + +template <class charT, class traits> +void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state) +{ + while(state) + { + switch(state->type) + { + case syntax_element_recurse: + m_has_recursions = true; + if(state->next.i) + state->next.p = getaddress(state->next.i, state); + else + state->next.p = 0; + break; + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + // set the state_id of this repeat: + static_cast<re_repeat*>(state)->state_id = m_repeater_id++; + // fall through: + case syntax_element_alt: + std::memset(static_cast<re_alt*>(state)->_map, 0, sizeof(static_cast<re_alt*>(state)->_map)); + static_cast<re_alt*>(state)->can_be_null = 0; + // fall through: + case syntax_element_jump: + static_cast<re_jump*>(state)->alt.p = getaddress(static_cast<re_jump*>(state)->alt.i, state); + // fall through again: + default: + if(state->next.i) + state->next.p = getaddress(state->next.i, state); + else + state->next.p = 0; + } + state = state->next.p; + } +} + +template <class charT, class traits> +void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state) +{ + re_syntax_base* base = state; + while(state) + { + switch(state->type) + { + case syntax_element_assert_backref: + { + // just check that the index is valid: + int id = static_cast<const re_brace*>(state)->index; + if(id < 0) + { + id = -id-1; + if(id >= 10000) + { + id = m_pdata->get_id(id); + if(id <= 0) + { + // check of sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern); + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + } + } + break; + case syntax_element_recurse: + { + bool ok = false; + re_syntax_base* p = base; + int id = static_cast<re_jump*>(state)->alt.i; + if(id > 10000) + id = m_pdata->get_id(id); + while(p) + { + if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == id)) + { + static_cast<re_jump*>(state)->alt.p = p; + ok = true; + break; + } + p = p->next.p; + } + if(!ok) + { + // recursion to sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern); + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + default: + break; + } + state = state->next.p; + } +} + +template <class charT, class traits> +void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state) +{ + // non-recursive implementation: + // create the last map in the machine first, so that earlier maps + // can make use of the result... + // + // This was originally a recursive implementation, but that caused stack + // overflows with complex expressions on small stacks (think COM+). + + // start by saving the case setting: + bool l_icase = m_icase; + std::vector<std::pair<bool, re_syntax_base*> > v; + + while(state) + { + switch(state->type) + { + case syntax_element_toggle_case: + // we need to track case changes here: + m_icase = static_cast<re_case*>(state)->icase; + state = state->next.p; + continue; + case syntax_element_alt: + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + // just push the state onto our stack for now: + v.push_back(std::pair<bool, re_syntax_base*>(m_icase, state)); + state = state->next.p; + break; + case syntax_element_backstep: + // we need to calculate how big the backstep is: + static_cast<re_brace*>(state)->index + = this->calculate_backstep(state->next.p); + if(static_cast<re_brace*>(state)->index < 0) + { + // Oops error: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern); + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + // fall through: + default: + state = state->next.p; + } + } + // now work through our list, building all the maps as we go: + while(v.size()) + { + const std::pair<bool, re_syntax_base*>& p = v.back(); + m_icase = p.first; + state = p.second; + v.pop_back(); + + // Build maps: + m_bad_repeats = 0; + create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take); + m_bad_repeats = 0; + create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip); + // adjust the type of the state to allow for faster matching: + state->type = this->get_repeat_type(state); + } + // restore case sensitivity: + m_icase = l_icase; +} + +template <class charT, class traits> +int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state) +{ + typedef typename traits::char_class_type mask_type; + int result = 0; + while(state) + { + switch(state->type) + { + case syntax_element_startmark: + if((static_cast<re_brace*>(state)->index == -1) + || (static_cast<re_brace*>(state)->index == -2)) + { + state = static_cast<re_jump*>(state->next.p)->alt.p->next.p; + continue; + } + else if(static_cast<re_brace*>(state)->index == -3) + { + state = state->next.p->next.p; + continue; + } + break; + case syntax_element_endmark: + if((static_cast<re_brace*>(state)->index == -1) + || (static_cast<re_brace*>(state)->index == -2)) + return result; + break; + case syntax_element_literal: + result += static_cast<re_literal*>(state)->length; + break; + case syntax_element_wild: + case syntax_element_set: + result += 1; + break; + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_backref: + case syntax_element_rep: + case syntax_element_combining: + case syntax_element_long_set_rep: + case syntax_element_backstep: + { + re_repeat* rep = static_cast<re_repeat *>(state); + // adjust the type of the state to allow for faster matching: + state->type = this->get_repeat_type(state); + if((state->type == syntax_element_dot_rep) + || (state->type == syntax_element_char_rep) + || (state->type == syntax_element_short_set_rep)) + { + if(rep->max != rep->min) + return -1; + result += static_cast<int>(rep->min); + state = rep->alt.p; + continue; + } + else if((state->type == syntax_element_long_set_rep)) + { + BOOST_ASSERT(rep->next.p->type == syntax_element_long_set); + if(static_cast<re_set_long<mask_type>*>(rep->next.p)->singleton == 0) + return -1; + if(rep->max != rep->min) + return -1; + result += static_cast<int>(rep->min); + state = rep->alt.p; + continue; + } + } + return -1; + case syntax_element_long_set: + if(static_cast<re_set_long<mask_type>*>(state)->singleton == 0) + return -1; + result += 1; + break; + case syntax_element_jump: + state = static_cast<re_jump*>(state)->alt.p; + continue; + default: + break; + } + state = state->next.p; + } + return -1; +} + +template <class charT, class traits> +void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask) +{ + int not_last_jump = 1; + + // track case sensitivity: + bool l_icase = m_icase; + + while(state) + { + switch(state->type) + { + case syntax_element_toggle_case: + l_icase = static_cast<re_case*>(state)->icase; + state = state->next.p; + break; + case syntax_element_literal: + { + // don't set anything in *pnull, set each element in l_map + // that could match the first character in the literal: + if(l_map) + { + l_map[0] |= mask_init; + charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1)); + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char) + l_map[i] |= mask; + } + } + return; + } + case syntax_element_end_line: + { + // next character must be a line separator (if there is one): + if(l_map) + { + l_map[0] |= mask_init; + l_map['\n'] |= mask; + l_map['\r'] |= mask; + l_map['\f'] |= mask; + l_map[0x85] |= mask; + } + // now figure out if we can match a NULL string at this point: + if(pnull) + create_startmap(state->next.p, 0, pnull, mask); + return; + } + case syntax_element_recurse: + case syntax_element_backref: + // can be null, and any character can match: + if(pnull) + *pnull |= mask; + // fall through: + case syntax_element_wild: + { + // can't be null, any character can match: + set_all_masks(l_map, mask); + return; + } + case syntax_element_match: + { + // must be null, any character can match: + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + case syntax_element_word_start: + { + // recurse, then AND with all the word characters: + create_startmap(state->next.p, l_map, pnull, mask); + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(!m_traits.isctype(static_cast<charT>(i), m_word_mask)) + l_map[i] &= static_cast<unsigned char>(~mask); + } + } + return; + } + case syntax_element_word_end: + { + // recurse, then AND with all the word characters: + create_startmap(state->next.p, l_map, pnull, mask); + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(m_traits.isctype(static_cast<charT>(i), m_word_mask)) + l_map[i] &= static_cast<unsigned char>(~mask); + } + } + return; + } + case syntax_element_buffer_end: + { + // we *must be null* : + if(pnull) + *pnull |= mask; + return; + } + case syntax_element_long_set: + if(l_map) + { + typedef typename traits::char_class_type mask_type; + if(static_cast<re_set_long<mask_type>*>(state)->singleton) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + charT c = static_cast<charT>(i); + if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata, m_icase)) + l_map[i] |= mask; + } + } + else + set_all_masks(l_map, mask); + } + return; + case syntax_element_set: + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(static_cast<re_set*>(state)->_map[ + static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))]) + l_map[i] |= mask; + } + } + return; + case syntax_element_jump: + // take the jump: + state = static_cast<re_alt*>(state)->alt.p; + not_last_jump = -1; + break; + case syntax_element_alt: + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + re_alt* rep = static_cast<re_alt*>(state); + if(rep->_map[0] & mask_init) + { + if(l_map) + { + // copy previous results: + l_map[0] |= mask_init; + for(unsigned int i = 0; i <= UCHAR_MAX; ++i) + { + if(rep->_map[i] & mask_any) + l_map[i] |= mask; + } + } + if(pnull) + { + if(rep->can_be_null & mask_any) + *pnull |= mask; + } + } + else + { + // we haven't created a startmap for this alternative yet + // so take the union of the two options: + if(is_bad_repeat(state)) + { + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + set_bad_repeat(state); + create_startmap(state->next.p, l_map, pnull, mask); + if((state->type == syntax_element_alt) + || (static_cast<re_repeat*>(state)->min == 0) + || (not_last_jump == 0)) + create_startmap(rep->alt.p, l_map, pnull, mask); + } + } + return; + case syntax_element_soft_buffer_end: + // match newline or null: + if(l_map) + { + l_map[0] |= mask_init; + l_map['\n'] |= mask; + l_map['\r'] |= mask; + } + if(pnull) + *pnull |= mask; + return; + case syntax_element_endmark: + // need to handle independent subs as a special case: + if(static_cast<re_brace*>(state)->index < 0) + { + // can be null, any character can match: + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + else + { + state = state->next.p; + break; + } + + case syntax_element_startmark: + // need to handle independent subs as a special case: + if(static_cast<re_brace*>(state)->index == -3) + { + state = state->next.p->next.p; + break; + } + // otherwise fall through: + default: + state = state->next.p; + } + ++not_last_jump; + } +} + +template <class charT, class traits> +unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* state) +{ + // + // find out how the machine starts, so we can optimise the search: + // + while(state) + { + switch(state->type) + { + case syntax_element_startmark: + case syntax_element_endmark: + state = state->next.p; + continue; + case syntax_element_start_line: + return regbase::restart_line; + case syntax_element_word_start: + return regbase::restart_word; + case syntax_element_buffer_start: + return regbase::restart_buf; + case syntax_element_restart_continue: + return regbase::restart_continue; + default: + state = 0; + continue; + } + } + return regbase::restart_any; +} + +template <class charT, class traits> +void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask) +{ + // + // set mask in all of bits elements, + // if bits[0] has mask_init not set then we can + // optimise this to a call to memset: + // + if(bits) + { + if(bits[0] == 0) + (std::memset)(bits, mask, 1u << CHAR_BIT); + else + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + bits[i] |= mask; + } + bits[0] |= mask_init; + } +} + +template <class charT, class traits> +bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt) +{ + switch(pt->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + unsigned state_id = static_cast<re_repeat*>(pt)->state_id; + if(state_id > sizeof(m_bad_repeats) * CHAR_BIT) + return true; // run out of bits, assume we can't traverse this one. + static const boost::uintmax_t one = 1uL; + return m_bad_repeats & (one << state_id); + } + default: + return false; + } +} + +template <class charT, class traits> +void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt) +{ + switch(pt->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + unsigned state_id = static_cast<re_repeat*>(pt)->state_id; + static const boost::uintmax_t one = 1uL; + if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT) + m_bad_repeats |= (one << state_id); + } + default: + break; + } +} + +template <class charT, class traits> +syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state) +{ + typedef typename traits::char_class_type mask_type; + if(state->type == syntax_element_rep) + { + // check to see if we are repeating a single state: + if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p) + { + switch(state->next.p->type) + { + case re_detail::syntax_element_wild: + return re_detail::syntax_element_dot_rep; + case re_detail::syntax_element_literal: + return re_detail::syntax_element_char_rep; + case re_detail::syntax_element_set: + return re_detail::syntax_element_short_set_rep; + case re_detail::syntax_element_long_set: + if(static_cast<re_detail::re_set_long<mask_type>*>(state->next.p)->singleton) + return re_detail::syntax_element_long_set_rep; + break; + default: + break; + } + } + } + return state->type; +} + +template <class charT, class traits> +void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state) +{ + // enumerate our states, and see if we have a leading repeat + // for which failed search restarts can be optimised; + do + { + switch(state->type) + { + case syntax_element_startmark: + if(static_cast<re_brace*>(state)->index >= 0) + { + state = state->next.p; + continue; + } + if((static_cast<re_brace*>(state)->index == -1) + || (static_cast<re_brace*>(state)->index == -2)) + { + // skip past the zero width assertion: + state = static_cast<const re_jump*>(state->next.p)->alt.p->next.p; + continue; + } + if(static_cast<re_brace*>(state)->index == -3) + { + // Have to skip the leading jump state: + state = state->next.p->next.p; + continue; + } + return; + case syntax_element_endmark: + case syntax_element_start_line: + case syntax_element_end_line: + case syntax_element_word_boundary: + case syntax_element_within_word: + case syntax_element_word_start: + case syntax_element_word_end: + case syntax_element_buffer_start: + case syntax_element_buffer_end: + case syntax_element_restart_continue: + state = state->next.p; + break; + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + if(this->m_has_backrefs == 0) + static_cast<re_repeat*>(state)->leading = true; + // fall through: + default: + return; + } + }while(state); +} + + +} // namespace re_detail + +} // namespace boost + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif diff --git a/ext/boost/regex/v4/basic_regex_parser.hpp b/ext/boost/regex/v4/basic_regex_parser.hpp new file mode 100644 index 0000000000..7d2f5543e2 --- /dev/null +++ b/ext/boost/regex/v4/basic_regex_parser.hpp @@ -0,0 +1,2571 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_parser.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares template class basic_regex_parser. + */ + +#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP +#define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ +namespace re_detail{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4244 4800) +#endif + +template <class charT, class traits> +class basic_regex_parser : public basic_regex_creator<charT, traits> +{ +public: + basic_regex_parser(regex_data<charT, traits>* data); + void parse(const charT* p1, const charT* p2, unsigned flags); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position); + + bool parse_all(); + bool parse_basic(); + bool parse_extended(); + bool parse_literal(); + bool parse_open_paren(); + bool parse_basic_escape(); + bool parse_extended_escape(); + bool parse_match_any(); + bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)()); + bool parse_repeat_range(bool isbasic); + bool parse_alt(); + bool parse_set(); + bool parse_backref(); + void parse_set_literal(basic_char_set<charT, traits>& char_set); + bool parse_inner_set(basic_char_set<charT, traits>& char_set); + bool parse_QE(); + bool parse_perl_extension(); + bool add_emacs_code(bool negate); + bool unwind_alts(std::ptrdiff_t last_paren_start); + digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set); + charT unescape_character(); + regex_constants::syntax_option_type parse_options(); + +private: + typedef bool (basic_regex_parser::*parser_proc_type)(); + typedef typename traits::string_type string_type; + typedef typename traits::char_class_type char_class_type; + parser_proc_type m_parser_proc; // the main parser to use + const charT* m_base; // the start of the string being parsed + const charT* m_end; // the end of the string being parsed + const charT* m_position; // our current parser position + unsigned m_mark_count; // how many sub-expressions we have + int m_mark_reset; // used to indicate that we're inside a (?|...) block. + unsigned m_max_mark; // largest mark count seen inside a (?|...) block. + std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted). + std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative + bool m_has_case_change; // true if somewhere in the current block the case has changed +#if defined(BOOST_MSVC) && defined(_M_IX86) + // This is an ugly warning suppression workaround (for warnings *inside* std::vector + // that can not otherwise be suppressed)... + BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*)); + std::vector<long> m_alt_jumps; // list of alternative in the current scope. +#else + std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope. +#endif + + basic_regex_parser& operator=(const basic_regex_parser&); + basic_regex_parser(const basic_regex_parser&); +}; + +template <class charT, class traits> +basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data) + : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false) +{ +} + +template <class charT, class traits> +void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags) +{ + // pass l_flags on to base class: + this->init(l_flags); + // set up pointers: + m_position = m_base = p1; + m_end = p2; + // empty strings are errors: + if((p1 == p2) && + ( + ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group) + || (l_flags & regbase::no_empty_expressions) + ) + ) + { + fail(regex_constants::error_empty, 0); + return; + } + // select which parser to use: + switch(l_flags & regbase::main_option_type) + { + case regbase::perl_syntax_group: + { + m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended; + // + // Add a leading paren with index zero to give recursions a target: + // + re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace))); + br->index = 0; + br->icase = this->flags() & regbase::icase; + break; + } + case regbase::basic_syntax_group: + m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic; + break; + case regbase::literal: + m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal; + break; + } + + // parse all our characters: + bool result = parse_all(); + // + // Unwind our alternatives: + // + unwind_alts(-1); + // reset l_flags as a global scope (?imsx) may have altered them: + this->flags(l_flags); + // if we haven't gobbled up all the characters then we must + // have had an unexpected ')' : + if(!result) + { + fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position)); + return; + } + // if an error has been set then give up now: + if(this->m_pdata->m_status) + return; + // fill in our sub-expression count: + this->m_pdata->m_mark_count = 1 + m_mark_count; + this->finalize(p1, p2); +} + +template <class charT, class traits> +void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position) +{ + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = error_code; + m_position = m_end; // don't bother parsing anything else + // get the error message: + std::string message = this->m_pdata->m_ptraits->error_string(error_code); + // and raise the exception, this will do nothing if exceptions are disabled: +#ifndef BOOST_NO_EXCEPTIONS + if(0 == (this->flags() & regex_constants::no_except)) + { + boost::regex_error e(message, error_code, position); + e.raise(); + } +#else + (void)position; // suppress warnings. +#endif +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_all() +{ + bool result = true; + while(result && (m_position != m_end)) + { + result = (this->*m_parser_proc)(); + } + return result; +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4702) +#endif +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_basic() +{ + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_escape: + return parse_basic_escape(); + case regex_constants::syntax_dot: + return parse_match_any(); + case regex_constants::syntax_caret: + ++m_position; + this->append_state(syntax_element_start_line); + break; + case regex_constants::syntax_dollar: + ++m_position; + this->append_state(syntax_element_end_line); + break; + case regex_constants::syntax_star: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(); + } + case regex_constants::syntax_plus: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(1); + } + case regex_constants::syntax_question: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(0, 1); + } + case regex_constants::syntax_open_set: + return parse_set(); + case regex_constants::syntax_newline: + if(this->flags() & regbase::newline_alt) + return parse_alt(); + else + return parse_literal(); + default: + return parse_literal(); + } + return true; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_extended() +{ + bool result = true; + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_open_mark: + return parse_open_paren(); + case regex_constants::syntax_close_mark: + return false; + case regex_constants::syntax_escape: + return parse_extended_escape(); + case regex_constants::syntax_dot: + return parse_match_any(); + case regex_constants::syntax_caret: + ++m_position; + this->append_state( + (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line)); + break; + case regex_constants::syntax_dollar: + ++m_position; + this->append_state( + (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line)); + break; + case regex_constants::syntax_star: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0); + return false; + } + ++m_position; + return parse_repeat(); + case regex_constants::syntax_question: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0); + return false; + } + ++m_position; + return parse_repeat(0,1); + case regex_constants::syntax_plus: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0); + return false; + } + ++m_position; + return parse_repeat(1); + case regex_constants::syntax_open_brace: + ++m_position; + return parse_repeat_range(false); + case regex_constants::syntax_close_brace: + fail(regex_constants::error_brace, this->m_position - this->m_end); + return false; + case regex_constants::syntax_or: + return parse_alt(); + case regex_constants::syntax_open_set: + return parse_set(); + case regex_constants::syntax_newline: + if(this->flags() & regbase::newline_alt) + return parse_alt(); + else + return parse_literal(); + case regex_constants::syntax_hash: + // + // If we have a mod_x flag set, then skip until + // we get to a newline character: + // + if((this->flags() + & (regbase::no_perl_ex|regbase::mod_x)) + == regbase::mod_x) + { + while((m_position != m_end) && !is_separator(*m_position++)){} + return true; + } + // Otherwise fall through: + default: + result = parse_literal(); + break; + } + return result; +} +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_literal() +{ + // append this as a literal provided it's not a space character + // or the perl option regbase::mod_x is not set: + if( + ((this->flags() + & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex)) + != regbase::mod_x) + || !this->m_traits.isctype(*m_position, this->m_mask_space)) + this->append_literal(*m_position); + ++m_position; + return true; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_open_paren() +{ + // + // skip the '(' and error check: + // + if(++m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + // + // begin by checking for a perl-style (?...) extension: + // + if( + ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0) + || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) + ) + { + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) + return parse_perl_extension(); + } + // + // update our mark count, and append the required state: + // + unsigned markid = 0; + if(0 == (this->flags() & regbase::nosubs)) + { + markid = ++m_mark_count; +#ifndef BOOST_NO_STD_DISTANCE + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0)); +#else + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0)); +#endif + } + re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + std::ptrdiff_t last_paren_start = this->getoffset(pb); + // back up insertion point for alternations, and set new point: + std::ptrdiff_t last_alt_point = m_alt_insert_point; + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + // + // back up the current flags in case we have a nested (?imsx) group: + // + regex_constants::syntax_option_type opts = this->flags(); + bool old_case_change = m_has_case_change; + m_has_case_change = false; // no changes to this scope as yet... + // + // Back up branch reset data in case we have a nested (?|...) + // + int mark_reset = m_mark_reset; + m_mark_reset = -1; + // + // now recursively add more states, this will terminate when we get to a + // matching ')' : + // + parse_all(); + // + // Unwind pushed alternatives: + // + if(0 == unwind_alts(last_paren_start)) + return false; + // + // restore flags: + // + if(m_has_case_change) + { + // the case has changed in one or more of the alternatives + // within the scoped (...) block: we have to add a state + // to reset the case sensitivity: + static_cast<re_case*>( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = opts & regbase::icase; + } + this->flags(opts); + m_has_case_change = old_case_change; + // + // restore branch reset: + // + m_mark_reset = mark_reset; + // + // we either have a ')' or we have run out of characters prematurely: + // + if(m_position == m_end) + { + this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end)); + return false; + } + BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); +#ifndef BOOST_NO_STD_DISTANCE + if(markid && (this->flags() & regbase::save_subexpression_location)) + this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position); +#else + if(markid && (this->flags() & regbase::save_subexpression_location)) + this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base); +#endif + ++m_position; + // + // append closing parenthesis state: + // + pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + this->m_paren_start = last_paren_start; + // + // restore the alternate insertion point: + // + this->m_alt_insert_point = last_alt_point; + // + // allow backrefs to this mark: + // + if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT)) + this->m_backrefs |= 1u << (markid - 1); + + return true; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_basic_escape() +{ + ++m_position; + bool result = true; + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::syntax_open_mark: + return parse_open_paren(); + case regex_constants::syntax_close_mark: + return false; + case regex_constants::syntax_plus: + if(this->flags() & regex_constants::bk_plus_qm) + { + ++m_position; + return parse_repeat(1); + } + else + return parse_literal(); + case regex_constants::syntax_question: + if(this->flags() & regex_constants::bk_plus_qm) + { + ++m_position; + return parse_repeat(0, 1); + } + else + return parse_literal(); + case regex_constants::syntax_open_brace: + if(this->flags() & regbase::no_intervals) + return parse_literal(); + ++m_position; + return parse_repeat_range(true); + case regex_constants::syntax_close_brace: + if(this->flags() & regbase::no_intervals) + return parse_literal(); + fail(regex_constants::error_brace, this->m_position - this->m_base); + return false; + case regex_constants::syntax_or: + if(this->flags() & regbase::bk_vbar) + return parse_alt(); + else + result = parse_literal(); + break; + case regex_constants::syntax_digit: + return parse_backref(); + case regex_constants::escape_type_start_buffer: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_buffer_start); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_end_buffer: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_buffer_end); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_word_assert: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_boundary); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_not_word_assert: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_within_word); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_left_word: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_start); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_right_word: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_end); + } + else + result = parse_literal(); + break; + default: + if(this->flags() & regbase::emacs_ex) + { + bool negate = true; + switch(*m_position) + { + case 'w': + negate = false; + // fall through: + case 'W': + { + basic_char_set<charT, traits> char_set; + if(negate) + char_set.negate(); + char_set.add_class(this->m_word_mask); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; + } + case 's': + negate = false; + // fall through: + case 'S': + return add_emacs_code(negate); + case 'c': + case 'C': + // not supported yet: + fail(regex_constants::error_escape, m_position - m_base); + return false; + default: + break; + } + } + result = parse_literal(); + break; + } + return result; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_extended_escape() +{ + ++m_position; + bool negate = false; // in case this is a character class escape: \w \d etc + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::escape_type_not_class: + negate = true; + // fall through: + case regex_constants::escape_type_class: + { +escape_type_class_jump: + typedef typename traits::char_class_type mask_type; + mask_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + basic_char_set<charT, traits> char_set; + if(negate) + char_set.negate(); + char_set.add_class(m); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; + } + // + // not a class, just a regular unknown escape: + // + this->append_literal(unescape_character()); + break; + } + case regex_constants::syntax_digit: + return parse_backref(); + case regex_constants::escape_type_left_word: + ++m_position; + this->append_state(syntax_element_word_start); + break; + case regex_constants::escape_type_right_word: + ++m_position; + this->append_state(syntax_element_word_end); + break; + case regex_constants::escape_type_start_buffer: + ++m_position; + this->append_state(syntax_element_buffer_start); + break; + case regex_constants::escape_type_end_buffer: + ++m_position; + this->append_state(syntax_element_buffer_end); + break; + case regex_constants::escape_type_word_assert: + ++m_position; + this->append_state(syntax_element_word_boundary); + break; + case regex_constants::escape_type_not_word_assert: + ++m_position; + this->append_state(syntax_element_within_word); + break; + case regex_constants::escape_type_Z: + ++m_position; + this->append_state(syntax_element_soft_buffer_end); + break; + case regex_constants::escape_type_Q: + return parse_QE(); + case regex_constants::escape_type_C: + return parse_match_any(); + case regex_constants::escape_type_X: + ++m_position; + this->append_state(syntax_element_combining); + break; + case regex_constants::escape_type_G: + ++m_position; + this->append_state(syntax_element_restart_continue); + break; + case regex_constants::escape_type_not_property: + negate = true; + // fall through: + case regex_constants::escape_type_property: + { + ++m_position; + char_class_type m; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + // maybe have \p{ddd} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + const charT* base = m_position; + // skip forward until we find enclosing brace: + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + m = this->m_traits.lookup_classname(++base, m_position++); + } + else + { + m = this->m_traits.lookup_classname(m_position, m_position+1); + ++m_position; + } + if(m != 0) + { + basic_char_set<charT, traits> char_set; + if(negate) + char_set.negate(); + char_set.add_class(m); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + return true; + } + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + case regex_constants::escape_type_reset_start_mark: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->index = -5; + pb->icase = this->flags() & regbase::icase; + this->m_pdata->m_data.align(); + ++m_position; + return true; + } + goto escape_type_class_jump; + case regex_constants::escape_type_line_ending: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + const charT* e = get_escape_R_string<charT>(); + const charT* old_position = m_position; + const charT* old_end = m_end; + const charT* old_base = m_base; + m_position = e; + m_base = e; + m_end = e + traits::length(e); + bool r = parse_all(); + m_position = ++old_position; + m_end = old_end; + m_base = old_base; + return r; + } + goto escape_type_class_jump; + case regex_constants::escape_type_extended_backref: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + bool have_brace = false; + bool negative = false; + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + // maybe have \g{ddd} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + have_brace = true; + } + negative = (*m_position == static_cast<charT>('-')); + if((negative) && (++m_position == m_end)) + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + const charT* pc = m_position; + int i = this->m_traits.toi(pc, m_end, 10); + if(i < 0) + { + // Check for a named capture: + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + ++m_position; + i = this->m_pdata->get_id(base, m_position); + pc = m_position; + } + if(negative) + i = 1 + m_mark_count - i; + if((i > 0) && (this->m_backrefs & (1u << (i-1)))) + { + m_position = pc; + re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace))); + pb->index = i; + pb->icase = this->flags() & regbase::icase; + } + else + { + fail(regex_constants::error_backref, m_position - m_end); + return false; + } + m_position = pc; + if(have_brace) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + ++m_position; + } + return true; + } + goto escape_type_class_jump; + case regex_constants::escape_type_control_v: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + goto escape_type_class_jump; + // fallthrough: + default: + this->append_literal(unescape_character()); + break; + } + return true; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_match_any() +{ + // + // we have a '.' that can match any character: + // + ++m_position; + static_cast<re_dot*>( + this->append_state(syntax_element_wild, sizeof(re_dot)) + )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s + ? re_detail::force_not_newline + : this->flags() & regbase::mod_s ? + re_detail::force_newline : re_detail::dont_care); + return true; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high) +{ + bool greedy = true; + bool pocessive = false; + std::size_t insert_point; + // + // when we get to here we may have a non-greedy ? mark still to come: + // + if((m_position != m_end) + && ( + (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex))) + ) + ) + { + // OK we have a perl or emacs regex, check for a '?': + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) + { + greedy = false; + ++m_position; + } + // for perl regexes only check for pocessive ++ repeats. + if((0 == (this->flags() & regbase::main_option_type)) + && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus)) + { + pocessive = true; + ++m_position; + } + } + if(0 == this->m_last_state) + { + fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position)); + return false; + } + if(this->m_last_state->type == syntax_element_endmark) + { + // insert a repeat before the '(' matching the last ')': + insert_point = this->m_paren_start; + } + else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1)) + { + // the last state was a literal with more than one character, split it in two: + re_literal* lit = static_cast<re_literal*>(this->m_last_state); + charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1]; + --(lit->length); + // now append new state: + lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT))); + lit->length = 1; + (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c; + insert_point = this->getoffset(this->m_last_state); + } + else + { + // repeat the last state whatever it was, need to add some error checking here: + switch(this->m_last_state->type) + { + case syntax_element_start_line: + case syntax_element_end_line: + case syntax_element_word_boundary: + case syntax_element_within_word: + case syntax_element_word_start: + case syntax_element_word_end: + case syntax_element_buffer_start: + case syntax_element_buffer_end: + case syntax_element_alt: + case syntax_element_soft_buffer_end: + case syntax_element_restart_continue: + case syntax_element_jump: + case syntax_element_startmark: + case syntax_element_backstep: + // can't legally repeat any of the above: + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + default: + // do nothing... + break; + } + insert_point = this->getoffset(this->m_last_state); + } + // + // OK we now know what to repeat, so insert the repeat around it: + // + re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size)); + rep->min = low; + rep->max = high; + rep->greedy = greedy; + rep->leading = false; + // store our repeater position for later: + std::ptrdiff_t rep_off = this->getoffset(rep); + // and append a back jump to the repeat: + re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump))); + jmp->alt.i = rep_off - this->getoffset(jmp); + this->m_pdata->m_data.align(); + // now fill in the alt jump for the repeat: + rep = static_cast<re_repeat*>(this->getaddress(rep_off)); + rep->alt.i = this->m_pdata->m_data.size() - rep_off; + // + // If the repeat is pocessive then bracket the repeat with a (?>...) + // independent sub-expression construct: + // + if(pocessive) + { + re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace))); + pb->index = -3; + pb->icase = this->flags() & regbase::icase; + re_jump* jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp); + pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = -3; + pb->icase = this->flags() & regbase::icase; + } + return true; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic) +{ + // + // parse a repeat-range: + // + std::size_t min, max; + int v; + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + // fail if at end: + if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base); + return false; + } + // get min: + v = this->m_traits.toi(m_position, m_end, 10); + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + if(v < 0) + { + fail(regex_constants::error_badbrace, this->m_position - this->m_base); + return false; + } + else if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base); + return false; + } + min = v; + // see if we have a comma: + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma) + { + // move on and error check: + ++m_position; + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base); + return false; + } + // get the value if any: + v = this->m_traits.toi(m_position, m_end, 10); + max = (v >= 0) ? v : (std::numeric_limits<std::size_t>::max)(); + } + else + { + // no comma, max = min: + max = min; + } + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + // OK now check trailing }: + if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base); + return false; + } + if(isbasic) + { + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape) + { + ++m_position; + if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base); + return false; + } + } + else + { + fail(regex_constants::error_badbrace, this->m_position - this->m_base); + return false; + } + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace) + ++m_position; + else + { + fail(regex_constants::error_badbrace, this->m_position - this->m_base); + return false; + } + // + // finally go and add the repeat, unless error: + // + if(min > max) + { + fail(regex_constants::error_badbrace, this->m_position - this->m_base); + return false; + } + return parse_repeat(min, max); +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_alt() +{ + // + // error check: if there have been no previous states, + // or if the last state was a '(' then error: + // + if( + ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark)) + && + !( + ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) + && + ((this->flags() & regbase::no_empty_expressions) == 0) + ) + ) + { + fail(regex_constants::error_empty, this->m_position - this->m_base); + return false; + } + // + // Reset mark count if required: + // + if(m_max_mark < m_mark_count) + m_max_mark = m_mark_count; + if(m_mark_reset >= 0) + m_mark_count = m_mark_reset; + + ++m_position; + // + // we need to append a trailing jump: + // + re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump)); + std::ptrdiff_t jump_offset = this->getoffset(pj); + // + // now insert the alternative: + // + re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size)); + jump_offset += re_alt_size; + this->m_pdata->m_data.align(); + palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt); + // + // update m_alt_insert_point so that the next alternate gets + // inserted at the start of the second of the two we've just created: + // + this->m_alt_insert_point = this->m_pdata->m_data.size(); + // + // the start of this alternative must have a case changes state + // if the current block has messed around with case changes: + // + if(m_has_case_change) + { + static_cast<re_case*>( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = this->m_icase; + } + // + // push the alternative onto our stack, a recursive + // implementation here is easier to understand (and faster + // as it happens), but causes all kinds of stack overflow problems + // on programs with small stacks (COM+). + // + m_alt_jumps.push_back(jump_offset); + return true; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_set() +{ + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_brack, m_position - m_base); + return false; + } + basic_char_set<charT, traits> char_set; + + const charT* base = m_position; // where the '[' was + const charT* item_base = m_position; // where the '[' or '^' was + + while(m_position != m_end) + { + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_caret: + if(m_position == base) + { + char_set.negate(); + ++m_position; + item_base = m_position; + } + else + parse_set_literal(char_set); + break; + case regex_constants::syntax_close_set: + if(m_position == item_base) + { + parse_set_literal(char_set); + break; + } + else + { + ++m_position; + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_range, m_position - m_base); + return false; + } + } + return true; + case regex_constants::syntax_open_set: + if(parse_inner_set(char_set)) + break; + return true; + case regex_constants::syntax_escape: + { + // + // look ahead and see if this is a character class shortcut + // \d \w \s etc... + // + ++m_position; + if(this->m_traits.escape_syntax_type(*m_position) + == regex_constants::escape_type_class) + { + char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + char_set.add_class(m); + ++m_position; + break; + } + } + else if(this->m_traits.escape_syntax_type(*m_position) + == regex_constants::escape_type_not_class) + { + // negated character class: + char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + char_set.add_negated_class(m); + ++m_position; + break; + } + } + // not a character class, just a regular escape: + --m_position; + parse_set_literal(char_set); + break; + } + default: + parse_set_literal(char_set); + break; + } + } + return m_position != m_end; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set) +{ + // + // we have either a character class [:name:] + // a collating element [.name.] + // or an equivalence class [=name=] + // + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return false; + } + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_dot: + // + // a collating element is treated as a literal: + // + --m_position; + parse_set_literal(char_set); + return true; + case regex_constants::syntax_colon: + { + // check that character classes are actually enabled: + if((this->flags() & (regbase::main_option_type | regbase::no_char_classes)) + == (regbase::basic_syntax_group | regbase::no_char_classes)) + { + --m_position; + parse_set_literal(char_set); + return true; + } + // skip the ':' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return false; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching ':]' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return false; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return false; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_brack, m_position - m_base); + return false; + } + // + // check for negated class: + // + bool negated = false; + if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret) + { + ++name_first; + negated = true; + } + typedef typename traits::char_class_type mask_type; + mask_type m = this->m_traits.lookup_classname(name_first, name_last); + if(m == 0) + { + if(char_set.empty() && (name_last - name_first == 1)) + { + // maybe a special case: + ++m_position; + if( (m_position != m_end) + && (this->m_traits.syntax_type(*m_position) + == regex_constants::syntax_close_set)) + { + if(this->m_traits.escape_syntax_type(*name_first) + == regex_constants::escape_type_left_word) + { + ++m_position; + this->append_state(syntax_element_word_start); + return false; + } + if(this->m_traits.escape_syntax_type(*name_first) + == regex_constants::escape_type_right_word) + { + ++m_position; + this->append_state(syntax_element_word_end); + return false; + } + } + } + fail(regex_constants::error_ctype, name_first - m_base); + return false; + } + if(negated == false) + char_set.add_class(m); + else + char_set.add_negated_class(m); + ++m_position; + break; + } + case regex_constants::syntax_equal: + { + // skip the '=' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return false; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching '=]' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return false; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return false; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_brack, m_position - m_base); + return false; + } + string_type m = this->m_traits.lookup_collatename(name_first, name_last); + if((0 == m.size()) || (m.size() > 2)) + { + fail(regex_constants::error_collate, name_first - m_base); + return false; + } + digraph<charT> d; + d.first = m[0]; + if(m.size() > 1) + d.second = m[1]; + else + d.second = 0; + char_set.add_equivalent(d); + ++m_position; + break; + } + default: + --m_position; + parse_set_literal(char_set); + break; + } + return true; +} + +template <class charT, class traits> +void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set) +{ + digraph<charT> start_range(get_next_set_literal(char_set)); + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) + { + // we have a range: + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set) + { + digraph<charT> end_range = get_next_set_literal(char_set); + char_set.add_range(start_range, end_range); + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) + { + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set) + { + // trailing - : + --m_position; + return; + } + fail(regex_constants::error_range, m_position - m_base); + return; + } + return; + } + --m_position; + } + char_set.add_single(start_range); +} + +template <class charT, class traits> +digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set) +{ + digraph<charT> result; + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_dash: + if(!char_set.empty()) + { + // see if we are at the end of the set: + if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_range, m_position - m_base); + return result; + } + --m_position; + } + result.first = *m_position++; + return result; + case regex_constants::syntax_escape: + // check to see if escapes are supported first: + if(this->flags() & regex_constants::no_escape_in_lists) + { + result = *m_position++; + break; + } + ++m_position; + result = unescape_character(); + break; + case regex_constants::syntax_open_set: + { + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, m_position - m_base); + return result; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot) + { + --m_position; + result.first = *m_position; + ++m_position; + return result; + } + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, m_position - m_base); + return result; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching ':]' + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + ++m_position; + string_type s = this->m_traits.lookup_collatename(name_first, name_last); + if(s.empty() || (s.size() > 2)) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + result.first = s[0]; + if(s.size() > 1) + result.second = s[1]; + else + result.second = 0; + return result; + } + default: + result = *m_position++; + } + return result; +} + +// +// does a value fit in the specified charT type? +// +template <class charT> +bool valid_value(charT, int v, const mpl::true_&) +{ + return (v >> (sizeof(charT) * CHAR_BIT)) == 0; +} +template <class charT> +bool valid_value(charT, int, const mpl::false_&) +{ + return true; // v will alsways fit in a charT +} +template <class charT> +bool valid_value(charT c, int v) +{ + return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>()); +} + +template <class charT, class traits> +charT basic_regex_parser<charT, traits>::unescape_character() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + charT result(0); + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::escape_type_control_a: + result = charT('\a'); + break; + case regex_constants::escape_type_e: + result = charT(27); + break; + case regex_constants::escape_type_control_f: + result = charT('\f'); + break; + case regex_constants::escape_type_control_n: + result = charT('\n'); + break; + case regex_constants::escape_type_control_r: + result = charT('\r'); + break; + case regex_constants::escape_type_control_t: + result = charT('\t'); + break; + case regex_constants::escape_type_control_v: + result = charT('\v'); + break; + case regex_constants::escape_type_word_assert: + result = charT('\b'); + break; + case regex_constants::escape_type_ascii_control: + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return result; + } + /* + if((*m_position < charT('@')) + || (*m_position > charT(125)) ) + { + fail(regex_constants::error_escape, m_position - m_base); + return result; + } + */ + result = static_cast<charT>(*m_position % 32); + break; + case regex_constants::escape_type_hex: + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return result; + } + // maybe have \x{ddd} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return result; + } + int i = this->m_traits.toi(m_position, m_end, 16); + if((m_position == m_end) + || (i < 0) + || ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)())) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + { + fail(regex_constants::error_badbrace, m_position - m_base); + return result; + } + ++m_position; + result = charT(i); + } + else + { + std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), m_end - m_position); + int i = this->m_traits.toi(m_position, m_position + len, 16); + if((i < 0) + || !valid_value(charT(0), i)) + { + fail(regex_constants::error_escape, m_position - m_base); + return result; + } + result = charT(i); + } + return result; + case regex_constants::syntax_digit: + { + // an octal escape sequence, the first character must be a zero + // followed by up to 3 octal digits: + std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4)); + const charT* bp = m_position; + int val = this->m_traits.toi(bp, bp + 1, 8); + if(val != 0) + { + // Oops not an octal escape after all: + fail(regex_constants::error_escape, m_position - m_base); + return result; + } + val = this->m_traits.toi(m_position, m_position + len, 8); + if(val < 0) + { + fail(regex_constants::error_escape, m_position - m_base); + return result; + } + return static_cast<charT>(val); + } + case regex_constants::escape_type_named_char: + { + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + // maybe have \N{name} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + const charT* base = m_position; + // skip forward until we find enclosing brace: + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + string_type s = this->m_traits.lookup_collatename(++base, m_position++); + if(s.empty()) + { + fail(regex_constants::error_collate, m_position - m_base); + return false; + } + if(s.size() == 1) + { + return s[0]; + } + } + // fall through is a failure: + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + default: + result = *m_position; + break; + } + ++m_position; + return result; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_backref() +{ + BOOST_ASSERT(m_position != m_end); + const charT* pc = m_position; + int i = this->m_traits.toi(pc, pc + 1, 10); + if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs))) + { + // not a backref at all but an octal escape sequence: + charT c = unescape_character(); + this->append_literal(c); + } + else if((i > 0) && (this->m_backrefs & (1u << (i-1)))) + { + m_position = pc; + re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace))); + pb->index = i; + pb->icase = this->flags() & regbase::icase; + } + else + { + fail(regex_constants::error_backref, m_position - m_end); + return false; + } + return true; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_QE() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + // + // parse a \Q...\E sequence: + // + ++m_position; // skip the Q + const charT* start = m_position; + const charT* end; + do + { + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape)) + ++m_position; + if(m_position == m_end) + { + // a \Q...\E sequence may terminate with the end of the expression: + end = m_position; + break; + } + if(++m_position == m_end) // skip the escape + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + // check to see if it's a \E: + if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E) + { + ++m_position; + end = m_position - 2; + break; + } + // otherwise go round again: + }while(true); + // + // now add all the character between the two escapes as literals: + // + while(start != end) + { + this->append_literal(*start); + ++start; + } + return true; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::parse_perl_extension() +{ + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + // + // treat comments as a special case, as these + // are the only ones that don't start with a leading + // startmark state: + // + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash) + { + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) + {} + return true; + } + // + // backup some state, and prepare the way: + // + int markid = 0; + std::ptrdiff_t jump_offset = 0; + re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->icase = this->flags() & regbase::icase; + std::ptrdiff_t last_paren_start = this->getoffset(pb); + // back up insertion point for alternations, and set new point: + std::ptrdiff_t last_alt_point = m_alt_insert_point; + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + std::ptrdiff_t expected_alt_point = m_alt_insert_point; + bool restore_flags = true; + regex_constants::syntax_option_type old_flags = this->flags(); + bool old_case_change = m_has_case_change; + m_has_case_change = false; + charT name_delim; + int mark_reset = m_mark_reset; + m_mark_reset = -1; + int v; + // + // select the actual extension used: + // + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_or: + m_mark_reset = m_mark_count; + // fall through: + case regex_constants::syntax_colon: + // + // a non-capturing mark: + // + pb->index = markid = 0; + ++m_position; + break; + case regex_constants::syntax_digit: + { + // + // a recursive subexpression: + // + v = this->m_traits.toi(m_position, m_end, 10); + if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } +insert_recursion: + pb->index = markid = 0; + static_cast<re_jump*>(this->append_state(syntax_element_recurse, sizeof(re_jump)))->alt.i = v; + static_cast<re_case*>( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = this->flags() & regbase::icase; + break; + } + case regex_constants::syntax_plus: + // + // A forward-relative recursive subexpression: + // + ++m_position; + v = this->m_traits.toi(m_position, m_end, 10); + if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + v += m_mark_count; + goto insert_recursion; + case regex_constants::syntax_dash: + // + // Possibly a backward-relative recursive subexpression: + // + ++m_position; + v = this->m_traits.toi(m_position, m_end, 10); + if(v <= 0) + { + --m_position; + // Oops not a relative recursion at all, but a (?-imsx) group: + goto option_group_jump; + } + v = m_mark_count + 1 - v; + if(v <= 0) + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + goto insert_recursion; + case regex_constants::syntax_equal: + pb->index = markid = -1; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::syntax_not: + pb->index = markid = -2; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::escape_type_left_word: + { + // a lookbehind assertion: + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position); + if(t == regex_constants::syntax_not) + pb->index = markid = -2; + else if(t == regex_constants::syntax_equal) + pb->index = markid = -1; + else + { + // Probably a named capture which also starts (?< : + name_delim = '>'; + --m_position; + goto named_capture_jump; + } + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->append_state(syntax_element_backstep, sizeof(re_brace)); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + } + case regex_constants::escape_type_right_word: + // + // an independent sub-expression: + // + pb->index = markid = -3; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::syntax_open_mark: + { + // a conditional expression: + pb->index = markid = -4; + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + int v = this->m_traits.toi(m_position, m_end, 10); + if(*m_position == charT('R')) + { + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(*m_position == charT('&')) + { + const charT* base = ++m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + v = -static_cast<int>(hash_value_from_capture_name(base, m_position)); + } + else + { + v = -this->m_traits.toi(m_position, m_end, 10); + } + re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = v < 0 ? (v - 1) : 0; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + } + else if((*m_position == charT('\'')) || (*m_position == charT('<'))) + { + const charT* base = ++m_position; + while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\''))) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + v = static_cast<int>(hash_value_from_capture_name(base, m_position)); + re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = v; + if((*m_position != charT('>')) && (*m_position != charT('\'')) || (++m_position == m_end)) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + } + else if(*m_position == charT('D')) + { + const char* def = "DEFINE"; + while(*def && (m_position != m_end) && (*m_position == charT(*def))) + ++m_position, ++def; + if((m_position == m_end) || *def) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = 9999; // special magic value! + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + } + else if(v > 0) + { + re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = v; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + } + else + { + // verify that we have a lookahead or lookbehind assert: + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word) + { + if(++m_position == m_end) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + m_position -= 3; + } + else + { + if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + m_position -= 2; + } + } + break; + } + case regex_constants::syntax_close_mark: + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + case regex_constants::escape_type_end_buffer: + { + name_delim = *m_position; +named_capture_jump: + markid = 0; + if(0 == (this->flags() & regbase::nosubs)) + { + markid = ++m_mark_count; + #ifndef BOOST_NO_STD_DISTANCE + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0)); + #else + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0)); + #endif + } + pb->index = markid; + const charT* base = ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + while((m_position != m_end) && (*m_position != name_delim)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + this->m_pdata->set_name(base, m_position, markid); + ++m_position; + break; + } + default: + if(*m_position == charT('R')) + { + ++m_position; + v = 0; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + goto insert_recursion; + } + if(*m_position == charT('&')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + v = static_cast<int>(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + if(*m_position == charT('P')) + { + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + if(*m_position == charT('>')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + v = static_cast<int>(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + } + // + // lets assume that we have a (?imsx) group and try and parse it: + // +option_group_jump: + regex_constants::syntax_option_type opts = parse_options(); + if(m_position == m_end) + return false; + // make a note of whether we have a case change: + m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase)); + pb->index = markid = 0; + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) + { + // update flags and carry on as normal: + this->flags(opts); + restore_flags = false; + old_case_change |= m_has_case_change; // defer end of scope by one ')' + } + else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon) + { + // update flags and carry on until the matching ')' is found: + this->flags(opts); + ++m_position; + } + else + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + + // finally append a case change state if we need it: + if(m_has_case_change) + { + static_cast<re_case*>( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = opts & regbase::icase; + } + + } + // + // now recursively add more states, this will terminate when we get to a + // matching ')' : + // + parse_all(); + // + // Unwind alternatives: + // + if(0 == unwind_alts(last_paren_start)) + return false; + // + // we either have a ')' or we have run out of characters prematurely: + // + if(m_position == m_end) + { + this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end)); + return false; + } + BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); + ++m_position; + // + // restore the flags: + // + if(restore_flags) + { + // append a case change state if we need it: + if(m_has_case_change) + { + static_cast<re_case*>( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = old_flags & regbase::icase; + } + this->flags(old_flags); + } + // + // set up the jump pointer if we have one: + // + if(jump_offset) + { + this->m_pdata->m_data.align(); + re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset)); + jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp); + if(this->m_last_state == jmp) + { + // Oops... we didn't have anything inside the assertion: + fail(regex_constants::error_empty, m_position - m_base); + return false; + } + } + // + // verify that if this is conditional expression, that we do have + // an alternative, if not add one: + // + if(markid == -4) + { + re_syntax_base* b = this->getaddress(expected_alt_point); + // Make sure we have exactly one alternative following this state: + if(b->type != syntax_element_alt) + { + re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt))); + alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt); + } + else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt) + { + // Can't have seen more than one alternative: + fail(regex_constants::error_bad_pattern, m_position - m_base); + return false; + } + else + { + // We must *not* have seen an alternative inside a (DEFINE) block: + b = this->getaddress(b->next.i, b); + if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999)) + { + fail(regex_constants::error_bad_pattern, m_position - m_base); + return false; + } + } + // check for invalid repetition of next state: + b = this->getaddress(expected_alt_point); + b = this->getaddress(static_cast<re_alt*>(b)->next.i, b); + if((b->type != syntax_element_assert_backref) + && (b->type != syntax_element_startmark)) + { + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + } + // + // append closing parenthesis state: + // + pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + this->m_paren_start = last_paren_start; + // + // restore the alternate insertion point: + // + this->m_alt_insert_point = last_alt_point; + // + // and the case change data: + // + m_has_case_change = old_case_change; + // + // And the mark_reset data: + // + if(m_max_mark > m_mark_count) + { + m_mark_count = m_max_mark; + } + m_mark_reset = mark_reset; + + + if(markid > 0) + { +#ifndef BOOST_NO_STD_DISTANCE + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1; +#else + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1; +#endif + // + // allow backrefs to this mark: + // + if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT))) + this->m_backrefs |= 1u << (markid - 1); + } + return true; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate) +{ + // + // parses an emacs style \sx or \Sx construct. + // + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + basic_char_set<charT, traits> char_set; + if(negate) + char_set.negate(); + + static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', }; + + switch(*m_position) + { + case 's': + case ' ': + char_set.add_class(this->m_mask_space); + break; + case 'w': + char_set.add_class(this->m_word_mask); + break; + case '_': + char_set.add_single(digraph<charT>(charT('$'))); + char_set.add_single(digraph<charT>(charT('&'))); + char_set.add_single(digraph<charT>(charT('*'))); + char_set.add_single(digraph<charT>(charT('+'))); + char_set.add_single(digraph<charT>(charT('-'))); + char_set.add_single(digraph<charT>(charT('_'))); + char_set.add_single(digraph<charT>(charT('<'))); + char_set.add_single(digraph<charT>(charT('>'))); + break; + case '.': + char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5)); + break; + case '(': + char_set.add_single(digraph<charT>(charT('('))); + char_set.add_single(digraph<charT>(charT('['))); + char_set.add_single(digraph<charT>(charT('{'))); + break; + case ')': + char_set.add_single(digraph<charT>(charT(')'))); + char_set.add_single(digraph<charT>(charT(']'))); + char_set.add_single(digraph<charT>(charT('}'))); + break; + case '"': + char_set.add_single(digraph<charT>(charT('"'))); + char_set.add_single(digraph<charT>(charT('\''))); + char_set.add_single(digraph<charT>(charT('`'))); + break; + case '\'': + char_set.add_single(digraph<charT>(charT('\''))); + char_set.add_single(digraph<charT>(charT(','))); + char_set.add_single(digraph<charT>(charT('#'))); + break; + case '<': + char_set.add_single(digraph<charT>(charT(';'))); + break; + case '>': + char_set.add_single(digraph<charT>(charT('\n'))); + char_set.add_single(digraph<charT>(charT('\f'))); + break; + default: + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; +} + +template <class charT, class traits> +regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options() +{ + // we have a (?imsx-imsx) group, convert it into a set of flags: + regex_constants::syntax_option_type f = this->flags(); + bool breakout = false; + do + { + switch(*m_position) + { + case 's': + f |= regex_constants::mod_s; + f &= ~regex_constants::no_mod_s; + break; + case 'm': + f &= ~regex_constants::no_mod_m; + break; + case 'i': + f |= regex_constants::icase; + break; + case 'x': + f |= regex_constants::mod_x; + break; + default: + breakout = true; + continue; + } + if(++m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + } + while(!breakout); + + if(*m_position == static_cast<charT>('-')) + { + if(++m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + do + { + switch(*m_position) + { + case 's': + f &= ~regex_constants::mod_s; + f |= regex_constants::no_mod_s; + break; + case 'm': + f |= regex_constants::no_mod_m; + break; + case 'i': + f &= ~regex_constants::icase; + break; + case 'x': + f &= ~regex_constants::mod_x; + break; + default: + breakout = true; + continue; + } + if(++m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + } + while(!breakout); + } + return f; +} + +template <class charT, class traits> +bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start) +{ + // + // If we didn't actually add any states after the last + // alternative then that's an error: + // + if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size())) + && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start) + && + !( + ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) + && + ((this->flags() & regbase::no_empty_expressions) == 0) + ) + ) + { + fail(regex_constants::error_empty, this->m_position - this->m_base); + return false; + } + // + // Fix up our alternatives: + // + while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)) + { + // + // fix up the jump to point to the end of the states + // that we've just added: + // + std::ptrdiff_t jump_offset = m_alt_jumps.back(); + m_alt_jumps.pop_back(); + this->m_pdata->m_data.align(); + re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset)); + BOOST_ASSERT(jmp->type == syntax_element_jump); + jmp->alt.i = this->m_pdata->m_data.size() - jump_offset; + } + return true; +} + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace re_detail +} // namespace boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif diff --git a/ext/boost/regex/v4/c_regex_traits.hpp b/ext/boost/regex/v4/c_regex_traits.hpp new file mode 100644 index 0000000000..d99b0f341b --- /dev/null +++ b/ext/boost/regex/v4/c_regex_traits.hpp @@ -0,0 +1,211 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE c_regex_traits.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares regular expression traits class that wraps the global C locale. + */ + +#ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_C_REGEX_TRAITS_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include <boost/regex/config.hpp> +#endif +#ifndef BOOST_REGEX_WORKAROUND_HPP +#include <boost/regex/v4/regex_workaround.hpp> +#endif + +#include <cctype> + +#ifdef BOOST_NO_STDC_NAMESPACE +namespace std{ + using ::strlen; using ::tolower; +} +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ + +template <class charT> +struct c_regex_traits; + +template<> +struct BOOST_REGEX_DECL c_regex_traits<char> +{ + c_regex_traits(){} + typedef char char_type; + typedef std::size_t size_type; + typedef std::string string_type; + struct locale_type{}; + typedef boost::uint32_t char_class_type; + + static size_type length(const char_type* p) + { + return (std::strlen)(p); + } + + char translate(char c) const + { + return c; + } + char translate_nocase(char c) const + { + return static_cast<char>((std::tolower)(static_cast<unsigned char>(c))); + } + + static string_type BOOST_REGEX_CALL transform(const char* p1, const char* p2); + static string_type BOOST_REGEX_CALL transform_primary(const char* p1, const char* p2); + + static char_class_type BOOST_REGEX_CALL lookup_classname(const char* p1, const char* p2); + static string_type BOOST_REGEX_CALL lookup_collatename(const char* p1, const char* p2); + + static bool BOOST_REGEX_CALL isctype(char, char_class_type); + static int BOOST_REGEX_CALL value(char, int); + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return locale_type(); } + +private: + // this type is not copyable: + c_regex_traits(const c_regex_traits&); + c_regex_traits& operator=(const c_regex_traits&); +}; + +#ifndef BOOST_NO_WREGEX +template<> +struct BOOST_REGEX_DECL c_regex_traits<wchar_t> +{ + c_regex_traits(){} + typedef wchar_t char_type; + typedef std::size_t size_type; + typedef std::wstring string_type; + struct locale_type{}; + typedef boost::uint32_t char_class_type; + + static size_type length(const char_type* p) + { + return (std::wcslen)(p); + } + + wchar_t translate(wchar_t c) const + { + return c; + } + wchar_t translate_nocase(wchar_t c) const + { + return (std::towlower)(c); + } + + static string_type BOOST_REGEX_CALL transform(const wchar_t* p1, const wchar_t* p2); + static string_type BOOST_REGEX_CALL transform_primary(const wchar_t* p1, const wchar_t* p2); + + static char_class_type BOOST_REGEX_CALL lookup_classname(const wchar_t* p1, const wchar_t* p2); + static string_type BOOST_REGEX_CALL lookup_collatename(const wchar_t* p1, const wchar_t* p2); + + static bool BOOST_REGEX_CALL isctype(wchar_t, char_class_type); + static int BOOST_REGEX_CALL value(wchar_t, int); + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return locale_type(); } + +private: + // this type is not copyable: + c_regex_traits(const c_regex_traits&); + c_regex_traits& operator=(const c_regex_traits&); +}; + +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +// +// Provide an unsigned short version as well, so the user can link to this +// no matter whether they build with /Zc:wchar_t or not (MSVC specific). +// +template<> +struct BOOST_REGEX_DECL c_regex_traits<unsigned short> +{ + c_regex_traits(){} + typedef unsigned short char_type; + typedef std::size_t size_type; + typedef std::basic_string<unsigned short> string_type; + struct locale_type{}; + typedef boost::uint32_t char_class_type; + + static size_type length(const char_type* p) + { + return (std::wcslen)((const wchar_t*)p); + } + + unsigned short translate(unsigned short c) const + { + return c; + } + unsigned short translate_nocase(unsigned short c) const + { + return (std::towlower)((wchar_t)c); + } + + static string_type BOOST_REGEX_CALL transform(const unsigned short* p1, const unsigned short* p2); + static string_type BOOST_REGEX_CALL transform_primary(const unsigned short* p1, const unsigned short* p2); + + static char_class_type BOOST_REGEX_CALL lookup_classname(const unsigned short* p1, const unsigned short* p2); + static string_type BOOST_REGEX_CALL lookup_collatename(const unsigned short* p1, const unsigned short* p2); + + static bool BOOST_REGEX_CALL isctype(unsigned short, char_class_type); + static int BOOST_REGEX_CALL value(unsigned short, int); + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return locale_type(); } + +private: + // this type is not copyable: + c_regex_traits(const c_regex_traits&); + c_regex_traits& operator=(const c_regex_traits&); +}; + +#endif + +#endif // BOOST_NO_WREGEX + +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + + diff --git a/ext/boost/regex/v4/char_regex_traits.hpp b/ext/boost/regex/v4/char_regex_traits.hpp new file mode 100644 index 0000000000..e8a501ca00 --- /dev/null +++ b/ext/boost/regex/v4/char_regex_traits.hpp @@ -0,0 +1,81 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE char_regex_traits.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares deprecated traits classes char_regex_traits<>. + */ + + +#ifndef BOOST_REGEX_V4_CHAR_REGEX_TRAITS_HPP +#define BOOST_REGEX_V4_CHAR_REGEX_TRAITS_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ + +namespace deprecated{ +// +// class char_regex_traits_i +// provides case insensitive traits classes (deprecated): +template <class charT> +class char_regex_traits_i : public regex_traits<charT> {}; + +template<> +class char_regex_traits_i<char> : public regex_traits<char> +{ +public: + typedef char char_type; + typedef unsigned char uchar_type; + typedef unsigned int size_type; + typedef regex_traits<char> base_type; + +}; + +#ifndef BOOST_NO_WREGEX +template<> +class char_regex_traits_i<wchar_t> : public regex_traits<wchar_t> +{ +public: + typedef wchar_t char_type; + typedef unsigned short uchar_type; + typedef unsigned int size_type; + typedef regex_traits<wchar_t> base_type; + +}; +#endif +} // namespace deprecated +} // namespace boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif // include + diff --git a/ext/boost/regex/v4/cpp_regex_traits.hpp b/ext/boost/regex/v4/cpp_regex_traits.hpp new file mode 100644 index 0000000000..7ce3ed300e --- /dev/null +++ b/ext/boost/regex/v4/cpp_regex_traits.hpp @@ -0,0 +1,1086 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE cpp_regex_traits.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares regular expression traits class cpp_regex_traits. + */ + +#ifndef BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED + +#include <boost/config.hpp> + +#ifndef BOOST_NO_STD_LOCALE + +#ifndef BOOST_RE_PAT_EXCEPT_HPP +#include <boost/regex/pattern_except.hpp> +#endif +#ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED +#include <boost/regex/v4/regex_traits_defaults.hpp> +#endif +#ifdef BOOST_HAS_THREADS +#include <boost/regex/pending/static_mutex.hpp> +#endif +#ifndef BOOST_REGEX_PRIMARY_TRANSFORM +#include <boost/regex/v4/primary_transform.hpp> +#endif +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#include <boost/regex/pending/object_cache.hpp> +#endif + +#include <istream> +#include <ios> +#include <climits> + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4786) +#endif + +namespace boost{ + +// +// forward declaration is needed by some compilers: +// +template <class charT> +class cpp_regex_traits; + +namespace re_detail{ + +// +// class parser_buf: +// acts as a stream buffer which wraps around a pair of pointers: +// +template <class charT, + class traits = ::std::char_traits<charT> > +class parser_buf : public ::std::basic_streambuf<charT, traits> +{ + typedef ::std::basic_streambuf<charT, traits> base_type; + typedef typename base_type::int_type int_type; + typedef typename base_type::char_type char_type; + typedef typename base_type::pos_type pos_type; + typedef ::std::streamsize streamsize; + typedef typename base_type::off_type off_type; +public: + parser_buf() : base_type() { setbuf(0, 0); } + const charT* getnext() { return this->gptr(); } +protected: + std::basic_streambuf<charT, traits>* setbuf(char_type* s, streamsize n); + typename parser_buf<charT, traits>::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which); + typename parser_buf<charT, traits>::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which); +private: + parser_buf& operator=(const parser_buf&); + parser_buf(const parser_buf&); +}; + +template<class charT, class traits> +std::basic_streambuf<charT, traits>* +parser_buf<charT, traits>::setbuf(char_type* s, streamsize n) +{ + this->setg(s, s, s + n); + return this; +} + +template<class charT, class traits> +typename parser_buf<charT, traits>::pos_type +parser_buf<charT, traits>::seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which) +{ + if(which & ::std::ios_base::out) + return pos_type(off_type(-1)); + std::ptrdiff_t size = this->egptr() - this->eback(); + std::ptrdiff_t pos = this->gptr() - this->eback(); + charT* g = this->eback(); + switch(way) + { + case ::std::ios_base::beg: + if((off < 0) || (off > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + off, g + size); + break; + case ::std::ios_base::end: + if((off < 0) || (off > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + size - off, g + size); + break; + case ::std::ios_base::cur: + { + std::ptrdiff_t newpos = static_cast<std::ptrdiff_t>(pos + off); + if((newpos < 0) || (newpos > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + newpos, g + size); + break; + } + default: ; + } +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4244) +#endif + return static_cast<pos_type>(this->gptr() - this->eback()); +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template<class charT, class traits> +typename parser_buf<charT, traits>::pos_type +parser_buf<charT, traits>::seekpos(pos_type sp, ::std::ios_base::openmode which) +{ + if(which & ::std::ios_base::out) + return pos_type(off_type(-1)); + off_type size = static_cast<off_type>(this->egptr() - this->eback()); + charT* g = this->eback(); + if(off_type(sp) <= size) + { + this->setg(g, g + off_type(sp), g + size); + } + return pos_type(off_type(-1)); +} + +// +// class cpp_regex_traits_base: +// acts as a container for locale and the facets we are using. +// +template <class charT> +struct cpp_regex_traits_base +{ + cpp_regex_traits_base(const std::locale& l) + { imbue(l); } + std::locale imbue(const std::locale& l); + + std::locale m_locale; + std::ctype<charT> const* m_pctype; +#ifndef BOOST_NO_STD_MESSAGES + std::messages<charT> const* m_pmessages; +#endif + std::collate<charT> const* m_pcollate; + + bool operator<(const cpp_regex_traits_base& b)const + { + if(m_pctype == b.m_pctype) + { +#ifndef BOOST_NO_STD_MESSAGES + if(m_pmessages == b.m_pmessages) + { + } + return m_pmessages < b.m_pmessages; +#else + return m_pcollate < b.m_pcollate; +#endif + } + return m_pctype < b.m_pctype; + } + bool operator==(const cpp_regex_traits_base& b)const + { + return (m_pctype == b.m_pctype) +#ifndef BOOST_NO_STD_MESSAGES + && (m_pmessages == b.m_pmessages) +#endif + && (m_pcollate == b.m_pcollate); + } +}; + +template <class charT> +std::locale cpp_regex_traits_base<charT>::imbue(const std::locale& l) +{ + std::locale result(m_locale); + m_locale = l; + m_pctype = &BOOST_USE_FACET(std::ctype<charT>, l); +#ifndef BOOST_NO_STD_MESSAGES + m_pmessages = &BOOST_USE_FACET(std::messages<charT>, l); +#endif + m_pcollate = &BOOST_USE_FACET(std::collate<charT>, l); + return result; +} + +// +// class cpp_regex_traits_char_layer: +// implements methods that require specialisation for narrow characters: +// +template <class charT> +class cpp_regex_traits_char_layer : public cpp_regex_traits_base<charT> +{ + typedef std::basic_string<charT> string_type; + typedef std::map<charT, regex_constants::syntax_type> map_type; + typedef typename map_type::const_iterator map_iterator_type; +public: + cpp_regex_traits_char_layer(const std::locale& l) + : cpp_regex_traits_base<charT>(l) + { + init(); + } + cpp_regex_traits_char_layer(const cpp_regex_traits_base<charT>& b) + : cpp_regex_traits_base<charT>(b) + { + init(); + } + void init(); + + regex_constants::syntax_type syntax_type(charT c)const + { + map_iterator_type i = m_char_map.find(c); + return ((i == m_char_map.end()) ? 0 : i->second); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + map_iterator_type i = m_char_map.find(c); + if(i == m_char_map.end()) + { + if(this->m_pctype->is(std::ctype_base::lower, c)) return regex_constants::escape_type_class; + if(this->m_pctype->is(std::ctype_base::upper, c)) return regex_constants::escape_type_not_class; + return 0; + } + return i->second; + } + +private: + string_type get_default_message(regex_constants::syntax_type); + // TODO: use a hash table when available! + map_type m_char_map; +}; + +template <class charT> +void cpp_regex_traits_char_layer<charT>::init() +{ + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: +#ifndef BOOST_NO_STD_MESSAGES +#ifndef __IBMCPP__ + typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1); +#else + typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits<charT>::get_catalog_name()); + if(cat_name.size()) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::re_detail::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if((int)cat >= 0) + { +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = this->m_pmessages->get(cat, 0, i, get_default_message(i)); + for(typename string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[mss[j]] = i; + } + } + this->m_pmessages->close(cat); +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + this->m_pmessages->close(cat); + throw; + } +#endif + } + else + { +#endif + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while(ptr && *ptr) + { + m_char_map[this->m_pctype->widen(*ptr)] = i; + ++ptr; + } + } +#ifndef BOOST_NO_STD_MESSAGES + } +#endif +} + +template <class charT> +typename cpp_regex_traits_char_layer<charT>::string_type + cpp_regex_traits_char_layer<charT>::get_default_message(regex_constants::syntax_type i) +{ + const char* ptr = get_default_syntax(i); + string_type result; + while(ptr && *ptr) + { + result.append(1, this->m_pctype->widen(*ptr)); + ++ptr; + } + return result; +} + +// +// specialised version for narrow characters: +// +template <> +class BOOST_REGEX_DECL cpp_regex_traits_char_layer<char> : public cpp_regex_traits_base<char> +{ + typedef std::string string_type; +public: + cpp_regex_traits_char_layer(const std::locale& l) + : cpp_regex_traits_base<char>(l) + { + init(); + } + cpp_regex_traits_char_layer(const cpp_regex_traits_base<char>& l) + : cpp_regex_traits_base<char>(l) + { + init(); + } + + regex_constants::syntax_type syntax_type(char c)const + { + return m_char_map[static_cast<unsigned char>(c)]; + } + regex_constants::escape_syntax_type escape_syntax_type(char c) const + { + return m_char_map[static_cast<unsigned char>(c)]; + } + +private: + regex_constants::syntax_type m_char_map[1u << CHAR_BIT]; + void init(); +}; + +#ifdef BOOST_REGEX_BUGGY_CTYPE_FACET +enum +{ + char_class_space=1<<0, + char_class_print=1<<1, + char_class_cntrl=1<<2, + char_class_upper=1<<3, + char_class_lower=1<<4, + char_class_alpha=1<<5, + char_class_digit=1<<6, + char_class_punct=1<<7, + char_class_xdigit=1<<8, + char_class_alnum=char_class_alpha|char_class_digit, + char_class_graph=char_class_alnum|char_class_punct, + char_class_blank=1<<9, + char_class_word=1<<10, + char_class_unicode=1<<11, + char_class_horizontal_space=1<<12, + char_class_vertical_space=1<<13 +}; + +#endif + +// +// class cpp_regex_traits_implementation: +// provides pimpl implementation for cpp_regex_traits. +// +template <class charT> +class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer<charT> +{ +public: + typedef typename cpp_regex_traits<charT>::char_class_type char_class_type; + typedef typename std::ctype<charT>::mask native_mask_type; +#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET + BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 24); + BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 25); + BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 26); + BOOST_STATIC_CONSTANT(char_class_type, mask_horizontal = 1u << 27); + BOOST_STATIC_CONSTANT(char_class_type, mask_vertical = 1u << 28); +#endif + + typedef std::basic_string<charT> string_type; + typedef charT char_type; + //cpp_regex_traits_implementation(); + cpp_regex_traits_implementation(const std::locale& l) + : cpp_regex_traits_char_layer<charT>(l) + { + init(); + } + cpp_regex_traits_implementation(const cpp_regex_traits_base<charT>& l) + : cpp_regex_traits_char_layer<charT>(l) + { + init(); + } + std::string error_string(regex_constants::error_type n) const + { + if(!m_error_strings.empty()) + { + std::map<int, std::string>::const_iterator p = m_error_strings.find(n); + return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second; + } + return get_default_error_string(n); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + char_class_type result = lookup_classname_imp(p1, p2); + if(result == 0) + { + string_type temp(p1, p2); + this->m_pctype->tolower(&*temp.begin(), &*temp.begin() + temp.size()); + result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size()); + } + return result; + } + string_type lookup_collatename(const charT* p1, const charT* p2) const; + string_type transform_primary(const charT* p1, const charT* p2) const; + string_type transform(const charT* p1, const charT* p2) const; +private: + std::map<int, std::string> m_error_strings; // error messages indexed by numberic ID + std::map<string_type, char_class_type> m_custom_class_names; // character class names + std::map<string_type, string_type> m_custom_collate_names; // collating element names + unsigned m_collate_type; // the form of the collation string + charT m_collate_delim; // the collation group delimiter + // + // helpers: + // + char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const; + void init(); +#ifdef BOOST_REGEX_BUGGY_CTYPE_FACET +public: + bool isctype(charT c, char_class_type m)const; +#endif +}; + +#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET +#if !defined(BOOST_NO_INCLASS_MEMBER_INITIALIZATION) + +template <class charT> +typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_blank; +template <class charT> +typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_word; +template <class charT> +typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_unicode; +template <class charT> +typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_vertical; +template <class charT> +typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_horizontal; + +#endif +#endif + +template <class charT> +typename cpp_regex_traits_implementation<charT>::string_type + cpp_regex_traits_implementation<charT>::transform_primary(const charT* p1, const charT* p2) const +{ + // + // PRECONDITIONS: + // + // A bug in gcc 3.2 (and maybe other versions as well) treats + // p1 as a null terminated string, for efficiency reasons + // we work around this elsewhere, but just assert here that + // we adhere to gcc's (buggy) preconditions... + // + BOOST_ASSERT(*p2 == 0); + + string_type result; + // + // swallowing all exceptions here is a bad idea + // however at least one std lib will always throw + // std::bad_alloc for certain arguments... + // + try{ + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch(m_collate_type) + { + case sort_C: + case sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size()); + result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case sort_fixed: + { + // get a regular sort key, and then truncate it: + result.assign(this->m_pcollate->transform(p1, p2)); + result.erase(this->m_collate_delim); + break; + } + case sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result.assign(this->m_pcollate->transform(p1, p2)); + std::size_t i; + for(i = 0; i < result.size(); ++i) + { + if(result[i] == m_collate_delim) + break; + } + result.erase(i); + break; + } + }catch(...){} + while(result.size() && (charT(0) == *result.rbegin())) + result.erase(result.size() - 1); + if(result.empty()) + { + // character is ignorable at the primary level: + result = string_type(1, charT(0)); + } + return result; +} + +template <class charT> +typename cpp_regex_traits_implementation<charT>::string_type + cpp_regex_traits_implementation<charT>::transform(const charT* p1, const charT* p2) const +{ + // + // PRECONDITIONS: + // + // A bug in gcc 3.2 (and maybe other versions as well) treats + // p1 as a null terminated string, for efficiency reasons + // we work around this elsewhere, but just assert here that + // we adhere to gcc's (buggy) preconditions... + // + BOOST_ASSERT(*p2 == 0); + // + // swallowing all exceptions here is a bad idea + // however at least one std lib will always throw + // std::bad_alloc for certain arguments... + // + string_type result; + try{ + result = this->m_pcollate->transform(p1, p2); + // + // Borland's STLPort version returns a NULL-terminated + // string that has garbage at the end - each call to + // std::collate<wchar_t>::transform returns a different string! + // So as a workaround, we'll truncate the string at the first NULL + // which _seems_ to work.... +#if BOOST_WORKAROUND(__BORLANDC__, < 0x580) + result.erase(result.find(charT(0))); +#else + // + // some implementations (Dinkumware) append unnecessary trailing \0's: + while(result.size() && (charT(0) == *result.rbegin())) + result.erase(result.size() - 1); +#endif + BOOST_ASSERT(std::find(result.begin(), result.end(), charT(0)) == result.end()); + } + catch(...) + { + } + return result; +} + + +template <class charT> +typename cpp_regex_traits_implementation<charT>::string_type + cpp_regex_traits_implementation<charT>::lookup_collatename(const charT* p1, const charT* p2) const +{ + typedef typename std::map<string_type, string_type>::const_iterator iter_type; + if(m_custom_collate_names.size()) + { + iter_type pos = m_custom_collate_names.find(string_type(p1, p2)); + if(pos != m_custom_collate_names.end()) + return pos->second; + } +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) + std::string name(p1, p2); +#else + std::string name; + const charT* p0 = p1; + while(p0 != p2) + name.append(1, char(*p0++)); +#endif + name = lookup_default_collate_name(name); +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) + if(name.size()) + return string_type(name.begin(), name.end()); +#else + if(name.size()) + { + string_type result; + typedef std::string::const_iterator iter; + iter b = name.begin(); + iter e = name.end(); + while(b != e) + result.append(1, charT(*b++)); + return result; + } +#endif + if(p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +template <class charT> +void cpp_regex_traits_implementation<charT>::init() +{ +#ifndef BOOST_NO_STD_MESSAGES +#ifndef __IBMCPP__ + typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1); +#else + typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits<charT>::get_catalog_name()); + if(cat_name.size()) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::re_detail::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if((int)cat >= 0) + { + // + // Error messages: + // + for(boost::regex_constants::error_type i = static_cast<boost::regex_constants::error_type>(0); + i <= boost::regex_constants::error_unknown; + i = static_cast<boost::regex_constants::error_type>(i + 1)) + { + const char* p = get_default_error_string(i); + string_type default_message; + while(*p) + { + default_message.append(1, this->m_pctype->widen(*p)); + ++p; + } + string_type s = this->m_pmessages->get(cat, 0, i+200, default_message); + std::string result; + for(std::string::size_type j = 0; j < s.size(); ++j) + { + result.append(1, this->m_pctype->narrow(s[j], 0)); + } + m_error_strings[i] = result; + } + // + // Custom class names: + // +#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET + static const char_class_type masks[16] = + { + std::ctype<charT>::alnum, + std::ctype<charT>::alpha, + std::ctype<charT>::cntrl, + std::ctype<charT>::digit, + std::ctype<charT>::graph, + cpp_regex_traits_implementation<charT>::mask_horizontal, + std::ctype<charT>::lower, + std::ctype<charT>::print, + std::ctype<charT>::punct, + std::ctype<charT>::space, + std::ctype<charT>::upper, + cpp_regex_traits_implementation<charT>::mask_vertical, + std::ctype<charT>::xdigit, + cpp_regex_traits_implementation<charT>::mask_blank, + cpp_regex_traits_implementation<charT>::mask_word, + cpp_regex_traits_implementation<charT>::mask_unicode, + }; +#else + static const char_class_type masks[14] = + { + ::boost::re_detail::char_class_alnum, + ::boost::re_detail::char_class_alpha, + ::boost::re_detail::char_class_cntrl, + ::boost::re_detail::char_class_digit, + ::boost::re_detail::char_class_graph, + ::boost::re_detail::char_class_horizontal_space, + ::boost::re_detail::char_class_lower, + ::boost::re_detail::char_class_print, + ::boost::re_detail::char_class_punct, + ::boost::re_detail::char_class_space, + ::boost::re_detail::char_class_upper, + ::boost::re_detail::char_class_vertical_space, + ::boost::re_detail::char_class_xdigit, + ::boost::re_detail::char_class_blank, + ::boost::re_detail::char_class_word, + ::boost::re_detail::char_class_unicode, + }; +#endif + static const string_type null_string; + for(unsigned int j = 0; j <= 13; ++j) + { + string_type s(this->m_pmessages->get(cat, 0, j+300, null_string)); + if(s.size()) + this->m_custom_class_names[s] = masks[j]; + } + } +#endif + // + // get the collation format used by m_pcollate: + // + m_collate_type = re_detail::find_sort_syntax(this, &m_collate_delim); +} + +template <class charT> +typename cpp_regex_traits_implementation<charT>::char_class_type + cpp_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const +{ +#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET + static const char_class_type masks[22] = + { + 0, + std::ctype<char>::alnum, + std::ctype<char>::alpha, + cpp_regex_traits_implementation<charT>::mask_blank, + std::ctype<char>::cntrl, + std::ctype<char>::digit, + std::ctype<char>::digit, + std::ctype<char>::graph, + cpp_regex_traits_implementation<charT>::mask_horizontal, + std::ctype<char>::lower, + std::ctype<char>::lower, + std::ctype<char>::print, + std::ctype<char>::punct, + std::ctype<char>::space, + std::ctype<char>::space, + std::ctype<char>::upper, + cpp_regex_traits_implementation<charT>::mask_unicode, + std::ctype<char>::upper, + cpp_regex_traits_implementation<charT>::mask_vertical, + std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word, + std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word, + std::ctype<char>::xdigit, + }; +#else + static const char_class_type masks[22] = + { + 0, + ::boost::re_detail::char_class_alnum, + ::boost::re_detail::char_class_alpha, + ::boost::re_detail::char_class_blank, + ::boost::re_detail::char_class_cntrl, + ::boost::re_detail::char_class_digit, + ::boost::re_detail::char_class_digit, + ::boost::re_detail::char_class_graph, + ::boost::re_detail::char_class_horizontal_space, + ::boost::re_detail::char_class_lower, + ::boost::re_detail::char_class_lower, + ::boost::re_detail::char_class_print, + ::boost::re_detail::char_class_punct, + ::boost::re_detail::char_class_space, + ::boost::re_detail::char_class_space, + ::boost::re_detail::char_class_upper, + ::boost::re_detail::char_class_unicode, + ::boost::re_detail::char_class_upper, + ::boost::re_detail::char_class_vertical_space, + ::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word, + ::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word, + ::boost::re_detail::char_class_xdigit, + }; +#endif + if(m_custom_class_names.size()) + { + typedef typename std::map<std::basic_string<charT>, char_class_type>::const_iterator map_iter; + map_iter pos = m_custom_class_names.find(string_type(p1, p2)); + if(pos != m_custom_class_names.end()) + return pos->second; + } + std::size_t state_id = 1 + re_detail::get_default_class_id(p1, p2); + BOOST_ASSERT(state_id < sizeof(masks) / sizeof(masks[0])); + return masks[state_id]; +} + +#ifdef BOOST_REGEX_BUGGY_CTYPE_FACET +template <class charT> +bool cpp_regex_traits_implementation<charT>::isctype(const charT c, char_class_type mask) const +{ + return + ((mask & ::boost::re_detail::char_class_space) && (m_pctype->is(std::ctype<charT>::space, c))) + || ((mask & ::boost::re_detail::char_class_print) && (m_pctype->is(std::ctype<charT>::print, c))) + || ((mask & ::boost::re_detail::char_class_cntrl) && (m_pctype->is(std::ctype<charT>::cntrl, c))) + || ((mask & ::boost::re_detail::char_class_upper) && (m_pctype->is(std::ctype<charT>::upper, c))) + || ((mask & ::boost::re_detail::char_class_lower) && (m_pctype->is(std::ctype<charT>::lower, c))) + || ((mask & ::boost::re_detail::char_class_alpha) && (m_pctype->is(std::ctype<charT>::alpha, c))) + || ((mask & ::boost::re_detail::char_class_digit) && (m_pctype->is(std::ctype<charT>::digit, c))) + || ((mask & ::boost::re_detail::char_class_punct) && (m_pctype->is(std::ctype<charT>::punct, c))) + || ((mask & ::boost::re_detail::char_class_xdigit) && (m_pctype->is(std::ctype<charT>::xdigit, c))) + || ((mask & ::boost::re_detail::char_class_blank) && (m_pctype->is(std::ctype<charT>::space, c)) && !::boost::re_detail::is_separator(c)) + || ((mask & ::boost::re_detail::char_class_word) && (c == '_')) + || ((mask & ::boost::re_detail::char_class_unicode) && ::boost::re_detail::is_extended(c)) + || ((mask & ::boost::re_detail::char_class_vertical) && (is_separator(c) || (c == '\v'))) + || ((mask & ::boost::re_detail::char_class_horizontal) && m_pctype->is(std::ctype<charT>::space, c) && !(is_separator(c) || (c == '\v'))); +} +#endif + + +template <class charT> +inline boost::shared_ptr<const cpp_regex_traits_implementation<charT> > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT)) +{ + cpp_regex_traits_base<charT> key(l); + return ::boost::object_cache<cpp_regex_traits_base<charT>, cpp_regex_traits_implementation<charT> >::get(key, 5); +} + +} // re_detail + +template <class charT> +class cpp_regex_traits +{ +private: + typedef std::ctype<charT> ctype_type; +public: + typedef charT char_type; + typedef std::size_t size_type; + typedef std::basic_string<char_type> string_type; + typedef std::locale locale_type; + typedef boost::uint_least32_t char_class_type; + + struct boost_extensions_tag{}; + + cpp_regex_traits() + : m_pimpl(re_detail::create_cpp_regex_traits<charT>(std::locale())) + { } + static size_type length(const char_type* p) + { + return std::char_traits<charT>::length(p); + } + regex_constants::syntax_type syntax_type(charT c)const + { + return m_pimpl->syntax_type(c); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + return m_pimpl->escape_syntax_type(c); + } + charT translate(charT c) const + { + return c; + } + charT translate_nocase(charT c) const + { + return m_pimpl->m_pctype->tolower(c); + } + charT translate(charT c, bool icase) const + { + return icase ? m_pimpl->m_pctype->tolower(c) : c; + } + charT tolower(charT c) const + { + return m_pimpl->m_pctype->tolower(c); + } + charT toupper(charT c) const + { + return m_pimpl->m_pctype->toupper(c); + } + string_type transform(const charT* p1, const charT* p2) const + { + return m_pimpl->transform(p1, p2); + } + string_type transform_primary(const charT* p1, const charT* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_classname(p1, p2); + } + string_type lookup_collatename(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_collatename(p1, p2); + } + bool isctype(charT c, char_class_type f) const + { +#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET + typedef typename std::ctype<charT>::mask ctype_mask; + + static const ctype_mask mask_base = + static_cast<ctype_mask>( + std::ctype<charT>::alnum + | std::ctype<charT>::alpha + | std::ctype<charT>::cntrl + | std::ctype<charT>::digit + | std::ctype<charT>::graph + | std::ctype<charT>::lower + | std::ctype<charT>::print + | std::ctype<charT>::punct + | std::ctype<charT>::space + | std::ctype<charT>::upper + | std::ctype<charT>::xdigit); + + if((f & mask_base) + && (m_pimpl->m_pctype->is( + static_cast<ctype_mask>(f & mask_base), c))) + return true; + else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_unicode) && re_detail::is_extended(c)) + return true; + else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_word) && (c == '_')) + return true; + else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_blank) + && m_pimpl->m_pctype->is(std::ctype<charT>::space, c) + && !re_detail::is_separator(c)) + return true; + else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_vertical) + && (::boost::re_detail::is_separator(c) || (c == '\v'))) + return true; + else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_horizontal) + && this->isctype(c, std::ctype<charT>::space) && !this->isctype(c, re_detail::cpp_regex_traits_implementation<charT>::mask_vertical)) + return true; + return false; +#else + return m_pimpl->isctype(c, f); +#endif + } + int toi(const charT*& p1, const charT* p2, int radix)const; + int value(charT c, int radix)const + { + const charT* pc = &c; + return toi(pc, pc + 1, radix); + } + locale_type imbue(locale_type l) + { + std::locale result(getloc()); + m_pimpl = re_detail::create_cpp_regex_traits<charT>(l); + return result; + } + locale_type getloc()const + { + return m_pimpl->m_locale; + } + std::string error_string(regex_constants::error_type n) const + { + return m_pimpl->error_string(n); + } + + // + // extension: + // set the name of the message catalog in use (defaults to "boost_regex"). + // + static std::string catalog_name(const std::string& name); + static std::string get_catalog_name(); + +private: + boost::shared_ptr<const re_detail::cpp_regex_traits_implementation<charT> > m_pimpl; + // + // catalog name handler: + // + static std::string& get_catalog_name_inst(); + +#ifdef BOOST_HAS_THREADS + static static_mutex& get_mutex_inst(); +#endif +}; + + +template <class charT> +int cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int radix)const +{ + re_detail::parser_buf<charT> sbuf; // buffer for parsing numbers. + std::basic_istream<charT> is(&sbuf); // stream for parsing numbers. + + // we do NOT want to parse any thousands separators inside the stream: + last = std::find(first, last, BOOST_USE_FACET(std::numpunct<charT>, is.getloc()).thousands_sep()); + + sbuf.pubsetbuf(const_cast<charT*>(static_cast<const charT*>(first)), static_cast<std::streamsize>(last-first)); + is.clear(); + if(std::abs(radix) == 16) is >> std::hex; + else if(std::abs(radix) == 8) is >> std::oct; + else is >> std::dec; + int val; + if(is >> val) + { + first = first + ((last - first) - sbuf.in_avail()); + return val; + } + else + return -1; +} + +template <class charT> +std::string cpp_regex_traits<charT>::catalog_name(const std::string& name) +{ +#ifdef BOOST_HAS_THREADS + static_mutex::scoped_lock lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + get_catalog_name_inst() = name; + return result; +} + +template <class charT> +std::string& cpp_regex_traits<charT>::get_catalog_name_inst() +{ + static std::string s_name; + return s_name; +} + +template <class charT> +std::string cpp_regex_traits<charT>::get_catalog_name() +{ +#ifdef BOOST_HAS_THREADS + static_mutex::scoped_lock lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + return result; +} + +#ifdef BOOST_HAS_THREADS +template <class charT> +static_mutex& cpp_regex_traits<charT>::get_mutex_inst() +{ + static static_mutex s_mutex = BOOST_STATIC_MUTEX_INIT; + return s_mutex; +} +#endif + + +} // boost + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + +#endif + + diff --git a/ext/boost/regex/v4/cregex.hpp b/ext/boost/regex/v4/cregex.hpp new file mode 100644 index 0000000000..cafe396cd9 --- /dev/null +++ b/ext/boost/regex/v4/cregex.hpp @@ -0,0 +1,329 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE cregex.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares POSIX API functions + * + boost::RegEx high level wrapper. + */ + +#ifndef BOOST_RE_CREGEX_HPP_INCLUDED +#define BOOST_RE_CREGEX_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include <boost/regex/config.hpp> +#endif +#include <boost/regex/v4/match_flags.hpp> +#include <boost/regex/v4/error_type.hpp> + +#ifdef __cplusplus +#include <cstddef> +#else +#include <stddef.h> +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +/* include these defs only for POSIX compatablity */ +#ifdef __cplusplus +namespace boost{ +extern "C" { +#endif + +#if defined(__cplusplus) && !defined(BOOST_NO_STDC_NAMESPACE) +typedef std::ptrdiff_t regoff_t; +typedef std::size_t regsize_t; +#else +typedef ptrdiff_t regoff_t; +typedef size_t regsize_t; +#endif + +typedef struct +{ + unsigned int re_magic; +#ifdef __cplusplus + std::size_t re_nsub; /* number of parenthesized subexpressions */ +#else + size_t re_nsub; +#endif + const char* re_endp; /* end pointer for REG_PEND */ + void* guts; /* none of your business :-) */ + match_flag_type eflags; /* none of your business :-) */ +} regex_tA; + +#ifndef BOOST_NO_WREGEX +typedef struct +{ + unsigned int re_magic; +#ifdef __cplusplus + std::size_t re_nsub; /* number of parenthesized subexpressions */ +#else + size_t re_nsub; +#endif + const wchar_t* re_endp; /* end pointer for REG_PEND */ + void* guts; /* none of your business :-) */ + match_flag_type eflags; /* none of your business :-) */ +} regex_tW; +#endif + +typedef struct +{ + regoff_t rm_so; /* start of match */ + regoff_t rm_eo; /* end of match */ +} regmatch_t; + +/* regcomp() flags */ +typedef enum{ + REG_BASIC = 0000, + REG_EXTENDED = 0001, + REG_ICASE = 0002, + REG_NOSUB = 0004, + REG_NEWLINE = 0010, + REG_NOSPEC = 0020, + REG_PEND = 0040, + REG_DUMP = 0200, + REG_NOCOLLATE = 0400, + REG_ESCAPE_IN_LISTS = 01000, + REG_NEWLINE_ALT = 02000, + REG_PERLEX = 04000, + + REG_PERL = REG_EXTENDED | REG_NOCOLLATE | REG_ESCAPE_IN_LISTS | REG_PERLEX, + REG_AWK = REG_EXTENDED | REG_ESCAPE_IN_LISTS, + REG_GREP = REG_BASIC | REG_NEWLINE_ALT, + REG_EGREP = REG_EXTENDED | REG_NEWLINE_ALT, + + REG_ASSERT = 15, + REG_INVARG = 16, + REG_ATOI = 255, /* convert name to number (!) */ + REG_ITOA = 0400 /* convert number to name (!) */ +} reg_comp_flags; + +/* regexec() flags */ +typedef enum{ + REG_NOTBOL = 00001, + REG_NOTEOL = 00002, + REG_STARTEND = 00004 +} reg_exec_flags; + +// +// POSIX error codes: +// +typedef unsigned reg_error_t; +typedef reg_error_t reg_errcode_t; // backwards compatibility + +static const reg_error_t REG_NOERROR = 0; /* Success. */ +static const reg_error_t REG_NOMATCH = 1; /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ +static const reg_error_t REG_BADPAT = 2; /* Invalid pattern. */ +static const reg_error_t REG_ECOLLATE = 3; /* Undefined collating element. */ +static const reg_error_t REG_ECTYPE = 4; /* Invalid character class name. */ +static const reg_error_t REG_EESCAPE = 5; /* Trailing backslash. */ +static const reg_error_t REG_ESUBREG = 6; /* Invalid back reference. */ +static const reg_error_t REG_EBRACK = 7; /* Unmatched left bracket. */ +static const reg_error_t REG_EPAREN = 8; /* Parenthesis imbalance. */ +static const reg_error_t REG_EBRACE = 9; /* Unmatched \{. */ +static const reg_error_t REG_BADBR = 10; /* Invalid contents of \{\}. */ +static const reg_error_t REG_ERANGE = 11; /* Invalid range end. */ +static const reg_error_t REG_ESPACE = 12; /* Ran out of memory. */ +static const reg_error_t REG_BADRPT = 13; /* No preceding re for repetition op. */ +static const reg_error_t REG_EEND = 14; /* unexpected end of expression */ +static const reg_error_t REG_ESIZE = 15; /* expression too big */ +static const reg_error_t REG_ERPAREN = 8; /* = REG_EPAREN : unmatched right parenthesis */ +static const reg_error_t REG_EMPTY = 17; /* empty expression */ +static const reg_error_t REG_E_MEMORY = 15; /* = REG_ESIZE : out of memory */ +static const reg_error_t REG_ECOMPLEXITY = 18; /* complexity too high */ +static const reg_error_t REG_ESTACK = 19; /* out of stack space */ +static const reg_error_t REG_E_UNKNOWN = 20; /* unknown error */ +static const reg_error_t REG_ENOSYS = 20; /* = REG_E_UNKNOWN : Reserved. */ + +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompA(regex_tA*, const char*, int); +BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorA(int, const regex_tA*, char*, regsize_t); +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecA(const regex_tA*, const char*, regsize_t, regmatch_t*, int); +BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeA(regex_tA*); + +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompW(regex_tW*, const wchar_t*, int); +BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorW(int, const regex_tW*, wchar_t*, regsize_t); +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecW(const regex_tW*, const wchar_t*, regsize_t, regmatch_t*, int); +BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeW(regex_tW*); +#endif + +#ifdef UNICODE +#define regcomp regcompW +#define regerror regerrorW +#define regexec regexecW +#define regfree regfreeW +#define regex_t regex_tW +#else +#define regcomp regcompA +#define regerror regerrorA +#define regexec regexecA +#define regfree regfreeA +#define regex_t regex_tA +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef __cplusplus +} // extern "C" +} // namespace +#endif + +// +// C++ high level wrapper goes here: +// +#if defined(__cplusplus) +#include <string> +#include <vector> +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +class RegEx; + +namespace re_detail{ + +class RegExData; +struct pred1; +struct pred2; +struct pred3; +struct pred4; + +} // namespace re_detail + +#if (defined(BOOST_MSVC) || defined(__BORLANDC__)) && !defined(BOOST_DISABLE_WIN32) +typedef bool (__cdecl *GrepCallback)(const RegEx& expression); +typedef bool (__cdecl *GrepFileCallback)(const char* file, const RegEx& expression); +typedef bool (__cdecl *FindFilesCallback)(const char* file); +#else +typedef bool (*GrepCallback)(const RegEx& expression); +typedef bool (*GrepFileCallback)(const char* file, const RegEx& expression); +typedef bool (*FindFilesCallback)(const char* file); +#endif + +class BOOST_REGEX_DECL RegEx +{ +private: + re_detail::RegExData* pdata; +public: + RegEx(); + RegEx(const RegEx& o); + ~RegEx(); + explicit RegEx(const char* c, bool icase = false); + explicit RegEx(const std::string& s, bool icase = false); + RegEx& operator=(const RegEx& o); + RegEx& operator=(const char* p); + RegEx& operator=(const std::string& s){ return this->operator=(s.c_str()); } + unsigned int SetExpression(const char* p, bool icase = false); + unsigned int SetExpression(const std::string& s, bool icase = false){ return SetExpression(s.c_str(), icase); } + std::string Expression()const; + unsigned int error_code()const; + // + // now matching operators: + // + bool Match(const char* p, match_flag_type flags = match_default); + bool Match(const std::string& s, match_flag_type flags = match_default) { return Match(s.c_str(), flags); } + bool Search(const char* p, match_flag_type flags = match_default); + bool Search(const std::string& s, match_flag_type flags = match_default) { return Search(s.c_str(), flags); } + unsigned int Grep(GrepCallback cb, const char* p, match_flag_type flags = match_default); + unsigned int Grep(GrepCallback cb, const std::string& s, match_flag_type flags = match_default) { return Grep(cb, s.c_str(), flags); } + unsigned int Grep(std::vector<std::string>& v, const char* p, match_flag_type flags = match_default); + unsigned int Grep(std::vector<std::string>& v, const std::string& s, match_flag_type flags = match_default) { return Grep(v, s.c_str(), flags); } + unsigned int Grep(std::vector<std::size_t>& v, const char* p, match_flag_type flags = match_default); + unsigned int Grep(std::vector<std::size_t>& v, const std::string& s, match_flag_type flags = match_default) { return Grep(v, s.c_str(), flags); } +#ifndef BOOST_REGEX_NO_FILEITER + unsigned int GrepFiles(GrepFileCallback cb, const char* files, bool recurse = false, match_flag_type flags = match_default); + unsigned int GrepFiles(GrepFileCallback cb, const std::string& files, bool recurse = false, match_flag_type flags = match_default) { return GrepFiles(cb, files.c_str(), recurse, flags); } + unsigned int FindFiles(FindFilesCallback cb, const char* files, bool recurse = false, match_flag_type flags = match_default); + unsigned int FindFiles(FindFilesCallback cb, const std::string& files, bool recurse = false, match_flag_type flags = match_default) { return FindFiles(cb, files.c_str(), recurse, flags); } +#endif + + std::string Merge(const std::string& in, const std::string& fmt, + bool copy = true, match_flag_type flags = match_default); + std::string Merge(const char* in, const char* fmt, + bool copy = true, match_flag_type flags = match_default); + + std::size_t Split(std::vector<std::string>& v, std::string& s, match_flag_type flags = match_default, unsigned max_count = ~0); + // + // now operators for returning what matched in more detail: + // + std::size_t Position(int i = 0)const; + std::size_t Length(int i = 0)const; + bool Matched(int i = 0)const; + std::size_t Marks()const; + std::string What(int i = 0)const; + std::string operator[](int i)const { return What(i); } + + static const std::size_t npos; + + friend struct re_detail::pred1; + friend struct re_detail::pred2; + friend struct re_detail::pred3; + friend struct re_detail::pred4; +}; + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif + +#endif // include guard + + + + + + + + + + diff --git a/ext/boost/regex/v4/error_type.hpp b/ext/boost/regex/v4/error_type.hpp new file mode 100644 index 0000000000..b6633a0092 --- /dev/null +++ b/ext/boost/regex/v4/error_type.hpp @@ -0,0 +1,58 @@ +/* + * + * Copyright (c) 2003-2005 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE error_type.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares regular expression error type enumerator. + */ + +#ifndef BOOST_REGEX_ERROR_TYPE_HPP +#define BOOST_REGEX_ERROR_TYPE_HPP + +#ifdef __cplusplus +namespace boost{ +#endif + +#ifdef __cplusplus +namespace regex_constants{ + +enum error_type{ + + error_ok = 0, // not used + error_no_match = 1, // not used + error_bad_pattern = 2, + error_collate = 3, + error_ctype = 4, + error_escape = 5, + error_backref = 6, + error_brack = 7, + error_paren = 8, + error_brace = 9, + error_badbrace = 10, + error_range = 11, + error_space = 12, + error_badrepeat = 13, + error_end = 14, // not used + error_size = 15, + error_right_paren = 16, // not used + error_empty = 17, + error_complexity = 18, + error_stack = 19, + error_unknown = 20 +}; + +} +} +#endif // __cplusplus + +#endif diff --git a/ext/boost/regex/v4/fileiter.hpp b/ext/boost/regex/v4/fileiter.hpp new file mode 100644 index 0000000000..f13c4b2fb7 --- /dev/null +++ b/ext/boost/regex/v4/fileiter.hpp @@ -0,0 +1,455 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE fileiter.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares various platform independent file and + * directory iterators, plus binary file input in + * the form of class map_file. + */ + +#ifndef BOOST_RE_FILEITER_HPP_INCLUDED +#define BOOST_RE_FILEITER_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include <boost/regex/config.hpp> +#endif +#include <boost/assert.hpp> + +#ifndef BOOST_REGEX_NO_FILEITER + +#if (defined(__CYGWIN__) || defined(__CYGWIN32__)) && !defined(BOOST_REGEX_NO_W32) +#error "Sorry, can't mix <windows.h> with STL code and gcc compiler: if you ran configure, try again with configure --disable-ms-windows" +#define BOOST_REGEX_FI_WIN32_MAP +#define BOOST_REGEX_FI_POSIX_DIR +#elif (defined(__WIN32__) || defined(_WIN32) || defined(WIN32)) && !defined(BOOST_REGEX_NO_W32) +#define BOOST_REGEX_FI_WIN32_MAP +#define BOOST_REGEX_FI_WIN32_DIR +#else +#define BOOST_REGEX_FI_POSIX_MAP +#define BOOST_REGEX_FI_POSIX_DIR +#endif + +#if defined(BOOST_REGEX_FI_WIN32_MAP)||defined(BOOST_REGEX_FI_WIN32_DIR) +#include <windows.h> +#endif + +#if defined(BOOST_REGEX_FI_WIN32_DIR) + +#include <cstddef> + +namespace boost{ + namespace re_detail{ + +#ifndef BOOST_NO_ANSI_APIS +typedef WIN32_FIND_DATAA _fi_find_data; +#else +typedef WIN32_FIND_DATAW _fi_find_data; +#endif +typedef HANDLE _fi_find_handle; + + } // namespace re_detail + +} // namespace boost + +#define _fi_invalid_handle INVALID_HANDLE_VALUE +#define _fi_dir FILE_ATTRIBUTE_DIRECTORY + +#elif defined(BOOST_REGEX_FI_POSIX_DIR) + +#include <cstddef> +#include <cstdio> +#include <cctype> +#include <iterator> +#include <list> +#include <cassert> +#include <dirent.h> + +#if defined(__SUNPRO_CC) +using std::list; +#endif + +#ifndef MAX_PATH +#define MAX_PATH 256 +#endif + +namespace boost{ + namespace re_detail{ + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +struct _fi_find_data +{ + unsigned dwFileAttributes; + char cFileName[MAX_PATH]; +}; + +struct _fi_priv_data; + +typedef _fi_priv_data* _fi_find_handle; +#define _fi_invalid_handle 0 +#define _fi_dir 1 + +_fi_find_handle _fi_FindFirstFile(const char* lpFileName, _fi_find_data* lpFindFileData); +bool _fi_FindNextFile(_fi_find_handle hFindFile, _fi_find_data* lpFindFileData); +bool _fi_FindClose(_fi_find_handle hFindFile); + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + + } // namespace re_detail +} // namespace boost + +#ifdef FindFirstFile + #undef FindFirstFile +#endif +#ifdef FindNextFile + #undef FindNextFile +#endif +#ifdef FindClose + #undef FindClose +#endif + +#define FindFirstFileA _fi_FindFirstFile +#define FindNextFileA _fi_FindNextFile +#define FindClose _fi_FindClose + +#endif + +namespace boost{ + namespace re_detail{ + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +#ifdef BOOST_REGEX_FI_WIN32_MAP // win32 mapfile + +class BOOST_REGEX_DECL mapfile +{ + HANDLE hfile; + HANDLE hmap; + const char* _first; + const char* _last; +public: + + typedef const char* iterator; + + mapfile(){ hfile = hmap = 0; _first = _last = 0; } + mapfile(const char* file){ hfile = hmap = 0; _first = _last = 0; open(file); } + ~mapfile(){ close(); } + void open(const char* file); + void close(); + const char* begin(){ return _first; } + const char* end(){ return _last; } + size_t size(){ return _last - _first; } + bool valid(){ return (hfile != 0) && (hfile != INVALID_HANDLE_VALUE); } +}; + + +#else + +class BOOST_REGEX_DECL mapfile_iterator; + +class BOOST_REGEX_DECL mapfile +{ + typedef char* pointer; + std::FILE* hfile; + long int _size; + pointer* _first; + pointer* _last; + mutable std::list<pointer*> condemed; + enum sizes + { + buf_size = 4096 + }; + void lock(pointer* node)const; + void unlock(pointer* node)const; +public: + + typedef mapfile_iterator iterator; + + mapfile(){ hfile = 0; _size = 0; _first = _last = 0; } + mapfile(const char* file){ hfile = 0; _size = 0; _first = _last = 0; open(file); } + ~mapfile(){ close(); } + void open(const char* file); + void close(); + iterator begin()const; + iterator end()const; + unsigned long size()const{ return _size; } + bool valid()const{ return hfile != 0; } + friend class mapfile_iterator; +}; + +class BOOST_REGEX_DECL mapfile_iterator +#if !defined(BOOST_NO_STD_ITERATOR) || defined(BOOST_MSVC_STD_ITERATOR) +: public std::iterator<std::random_access_iterator_tag, char> +#endif +{ + typedef mapfile::pointer internal_pointer; + internal_pointer* node; + const mapfile* file; + unsigned long offset; + long position()const + { + return file ? ((node - file->_first) * mapfile::buf_size + offset) : 0; + } + void position(long pos) + { + if(file) + { + node = file->_first + (pos / mapfile::buf_size); + offset = pos % mapfile::buf_size; + } + } +public: + typedef std::ptrdiff_t difference_type; + typedef char value_type; + typedef const char* pointer; + typedef const char& reference; + typedef std::random_access_iterator_tag iterator_category; + + mapfile_iterator() { node = 0; file = 0; offset = 0; } + mapfile_iterator(const mapfile* f, long arg_position) + { + file = f; + node = f->_first + arg_position / mapfile::buf_size; + offset = arg_position % mapfile::buf_size; + if(file) + file->lock(node); + } + mapfile_iterator(const mapfile_iterator& i) + { + file = i.file; + node = i.node; + offset = i.offset; + if(file) + file->lock(node); + } + ~mapfile_iterator() + { + if(file && node) + file->unlock(node); + } + mapfile_iterator& operator = (const mapfile_iterator& i); + char operator* ()const + { + BOOST_ASSERT(node >= file->_first); + BOOST_ASSERT(node < file->_last); + return file ? *(*node + sizeof(int) + offset) : char(0); + } + char operator[] (long off)const + { + mapfile_iterator tmp(*this); + tmp += off; + return *tmp; + } + mapfile_iterator& operator++ (); + mapfile_iterator operator++ (int); + mapfile_iterator& operator-- (); + mapfile_iterator operator-- (int); + + mapfile_iterator& operator += (long off) + { + position(position() + off); + return *this; + } + mapfile_iterator& operator -= (long off) + { + position(position() - off); + return *this; + } + + friend inline bool operator==(const mapfile_iterator& i, const mapfile_iterator& j) + { + return (i.file == j.file) && (i.node == j.node) && (i.offset == j.offset); + } + + friend inline bool operator!=(const mapfile_iterator& i, const mapfile_iterator& j) + { + return !(i == j); + } + + friend inline bool operator<(const mapfile_iterator& i, const mapfile_iterator& j) + { + return i.position() < j.position(); + } + friend inline bool operator>(const mapfile_iterator& i, const mapfile_iterator& j) + { + return i.position() > j.position(); + } + friend inline bool operator<=(const mapfile_iterator& i, const mapfile_iterator& j) + { + return i.position() <= j.position(); + } + friend inline bool operator>=(const mapfile_iterator& i, const mapfile_iterator& j) + { + return i.position() >= j.position(); + } + + friend mapfile_iterator operator + (const mapfile_iterator& i, long off); + friend mapfile_iterator operator + (long off, const mapfile_iterator& i) + { + mapfile_iterator tmp(i); + return tmp += off; + } + friend mapfile_iterator operator - (const mapfile_iterator& i, long off); + friend inline long operator - (const mapfile_iterator& i, const mapfile_iterator& j) + { + return i.position() - j.position(); + } +}; + +#endif + +// _fi_sep determines the directory separator, either '\\' or '/' +BOOST_REGEX_DECL extern const char* _fi_sep; + +struct file_iterator_ref +{ + _fi_find_handle hf; + _fi_find_data _data; + long count; +}; + + +class BOOST_REGEX_DECL file_iterator +{ + char* _root; + char* _path; + char* ptr; + file_iterator_ref* ref; + +public: + typedef std::ptrdiff_t difference_type; + typedef const char* value_type; + typedef const char** pointer; + typedef const char*& reference; + typedef std::input_iterator_tag iterator_category; + + file_iterator(); + file_iterator(const char* wild); + ~file_iterator(); + file_iterator(const file_iterator&); + file_iterator& operator=(const file_iterator&); + const char* root()const { return _root; } + const char* path()const { return _path; } + const char* name()const { return ptr; } + _fi_find_data* data() { return &(ref->_data); } + void next(); + file_iterator& operator++() { next(); return *this; } + file_iterator operator++(int); + const char* operator*() { return path(); } + + friend inline bool operator == (const file_iterator& f1, const file_iterator& f2) + { + return ((f1.ref->hf == _fi_invalid_handle) && (f2.ref->hf == _fi_invalid_handle)); + } + + friend inline bool operator != (const file_iterator& f1, const file_iterator& f2) + { + return !(f1 == f2); + } + +}; + +// dwa 9/13/00 - suppress unused parameter warning +inline bool operator < (const file_iterator&, const file_iterator&) +{ + return false; +} + + +class BOOST_REGEX_DECL directory_iterator +{ + char* _root; + char* _path; + char* ptr; + file_iterator_ref* ref; + +public: + typedef std::ptrdiff_t difference_type; + typedef const char* value_type; + typedef const char** pointer; + typedef const char*& reference; + typedef std::input_iterator_tag iterator_category; + + directory_iterator(); + directory_iterator(const char* wild); + ~directory_iterator(); + directory_iterator(const directory_iterator& other); + directory_iterator& operator=(const directory_iterator& other); + + const char* root()const { return _root; } + const char* path()const { return _path; } + const char* name()const { return ptr; } + _fi_find_data* data() { return &(ref->_data); } + void next(); + directory_iterator& operator++() { next(); return *this; } + directory_iterator operator++(int); + const char* operator*() { return path(); } + + static const char* separator() { return _fi_sep; } + + friend inline bool operator == (const directory_iterator& f1, const directory_iterator& f2) + { + return ((f1.ref->hf == _fi_invalid_handle) && (f2.ref->hf == _fi_invalid_handle)); + } + + + friend inline bool operator != (const directory_iterator& f1, const directory_iterator& f2) + { + return !(f1 == f2); + } + + }; + +inline bool operator < (const directory_iterator&, const directory_iterator&) +{ + return false; +} + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + + +} // namespace re_detail +using boost::re_detail::directory_iterator; +using boost::re_detail::file_iterator; +using boost::re_detail::mapfile; +} // namespace boost + +#endif // BOOST_REGEX_NO_FILEITER +#endif // BOOST_RE_FILEITER_HPP + + + + + + + + + + + + + + + + + + diff --git a/ext/boost/regex/v4/instances.hpp b/ext/boost/regex/v4/instances.hpp new file mode 100644 index 0000000000..d12dc6b270 --- /dev/null +++ b/ext/boost/regex/v4/instances.hpp @@ -0,0 +1,215 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE instances.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Defines those template instances that are placed in the + * library rather than in the users object files. + */ + +// +// note no include guard, we may include this multiple times: +// +#ifndef BOOST_REGEX_NO_EXTERNAL_TEMPLATES + +namespace boost{ + +// +// this header can be included multiple times, each time with +// a different character type, BOOST_REGEX_CHAR_T must be defined +// first: +// +#ifndef BOOST_REGEX_CHAR_T +# error "BOOST_REGEX_CHAR_T not defined" +#endif + +#ifndef BOOST_REGEX_TRAITS_T +# define BOOST_REGEX_TRAITS_T , boost::regex_traits<BOOST_REGEX_CHAR_T > +#endif + +// +// what follows is compiler specific: +// + +#if defined(__BORLANDC__) && (__BORLANDC__ < 0x600) + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +# ifndef BOOST_REGEX_INSTANTIATE +# pragma option push -Jgx +# endif + +template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >; +template class BOOST_REGEX_DECL match_results< const BOOST_REGEX_CHAR_T* >; +#ifndef BOOST_NO_STD_ALLOCATOR +template class BOOST_REGEX_DECL ::boost::re_detail::perl_matcher<BOOST_REGEX_CHAR_T const *, match_results< const BOOST_REGEX_CHAR_T* >::allocator_type BOOST_REGEX_TRAITS_T >; +#endif + +# ifndef BOOST_REGEX_INSTANTIATE +# pragma option pop +# endif + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + +#elif defined(BOOST_MSVC) || defined(__ICL) + +# ifndef BOOST_REGEX_INSTANTIATE +# ifdef __GNUC__ +# define template __extension__ extern template +# else +# if BOOST_MSVC > 1310 +# define BOOST_REGEX_TEMPLATE_DECL +# endif +# define template extern template +# endif +# endif + +#ifndef BOOST_REGEX_TEMPLATE_DECL +# define BOOST_REGEX_TEMPLATE_DECL BOOST_REGEX_DECL +#endif + +# ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable : 4251 4231 4660) +# endif + +template class BOOST_REGEX_TEMPLATE_DECL basic_regex< BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >; + +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) +template class BOOST_REGEX_TEMPLATE_DECL match_results< const BOOST_REGEX_CHAR_T* >; +#endif +#ifndef BOOST_NO_STD_ALLOCATOR +template class BOOST_REGEX_TEMPLATE_DECL ::boost::re_detail::perl_matcher<BOOST_REGEX_CHAR_T const *, match_results< const BOOST_REGEX_CHAR_T* >::allocator_type BOOST_REGEX_TRAITS_T >; +#endif +#if !(defined(BOOST_DINKUMWARE_STDLIB) && (BOOST_DINKUMWARE_STDLIB <= 1))\ + && !(defined(BOOST_INTEL_CXX_VERSION) && (BOOST_INTEL_CXX_VERSION <= 800))\ + && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))\ + && !defined(BOOST_REGEX_ICU_INSTANCES) +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) +template class BOOST_REGEX_TEMPLATE_DECL match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >; +#endif +#ifndef BOOST_NO_STD_ALLOCATOR +template class BOOST_REGEX_TEMPLATE_DECL ::boost::re_detail::perl_matcher< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator, match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >::allocator_type, boost::regex_traits<BOOST_REGEX_CHAR_T > >; +#endif +#endif + + +# ifdef BOOST_MSVC +# pragma warning(pop) +# endif + +# ifdef template +# undef template +# endif + +#undef BOOST_REGEX_TEMPLATE_DECL + +#elif (defined(__GNUC__) && (__GNUC__ >= 3)) + +# ifndef BOOST_REGEX_INSTANTIATE +# define template __extension__ extern template +# endif + +#if !defined(BOOST_NO_STD_LOCALE) && !defined(BOOST_REGEX_ICU_INSTANCES) +namespace re_detail{ +template BOOST_REGEX_DECL +std::locale cpp_regex_traits_base<BOOST_REGEX_CHAR_T>::imbue(const std::locale& l); + +template BOOST_REGEX_DECL +cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::string_type + cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::transform_primary(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +template BOOST_REGEX_DECL +cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::string_type + cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::transform(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +template BOOST_REGEX_DECL +cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::string_type + cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::lookup_collatename(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +template BOOST_REGEX_DECL +void cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::init(); +template BOOST_REGEX_DECL +cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::char_class_type + cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::lookup_classname_imp(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +#ifdef BOOST_REGEX_BUGGY_CTYPE_FACET +template BOOST_REGEX_DECL +bool cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::isctype(const BOOST_REGEX_CHAR_T c, char_class_type mask) const; +#endif +} // namespace +template BOOST_REGEX_DECL +int cpp_regex_traits<BOOST_REGEX_CHAR_T>::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const; +template BOOST_REGEX_DECL +std::string cpp_regex_traits<BOOST_REGEX_CHAR_T>::catalog_name(const std::string& name); +template BOOST_REGEX_DECL +std::string& cpp_regex_traits<BOOST_REGEX_CHAR_T>::get_catalog_name_inst(); +template BOOST_REGEX_DECL +std::string cpp_regex_traits<BOOST_REGEX_CHAR_T>::get_catalog_name(); +#ifdef BOOST_HAS_THREADS +template BOOST_REGEX_DECL +static_mutex& cpp_regex_traits<BOOST_REGEX_CHAR_T>::get_mutex_inst(); +#endif +#endif + +template BOOST_REGEX_DECL basic_regex<BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >& + basic_regex<BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >::do_assign( + const BOOST_REGEX_CHAR_T* p1, + const BOOST_REGEX_CHAR_T* p2, + flag_type f); +template BOOST_REGEX_DECL basic_regex<BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >::locale_type BOOST_REGEX_CALL + basic_regex<BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >::imbue(locale_type l); + +template BOOST_REGEX_DECL void BOOST_REGEX_CALL + match_results<const BOOST_REGEX_CHAR_T*>::maybe_assign( + const match_results<const BOOST_REGEX_CHAR_T*>& m); + +namespace re_detail{ +template BOOST_REGEX_DECL void perl_matcher<BOOST_REGEX_CHAR_T const *, match_results< const BOOST_REGEX_CHAR_T* >::allocator_type BOOST_REGEX_TRAITS_T >::construct_init( + const basic_regex<BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >& e, match_flag_type f); +template BOOST_REGEX_DECL bool perl_matcher<BOOST_REGEX_CHAR_T const *, match_results< const BOOST_REGEX_CHAR_T* >::allocator_type BOOST_REGEX_TRAITS_T >::match(); +template BOOST_REGEX_DECL bool perl_matcher<BOOST_REGEX_CHAR_T const *, match_results< const BOOST_REGEX_CHAR_T* >::allocator_type BOOST_REGEX_TRAITS_T >::find(); +} // namespace + +#if (defined(__GLIBCPP__) || defined(__GLIBCXX__)) \ + && !defined(BOOST_REGEX_ICU_INSTANCES)\ + && !defined(__SGI_STL_PORT)\ + && !defined(_STLPORT_VERSION) +// std:basic_string<>::const_iterator instances as well: +template BOOST_REGEX_DECL void BOOST_REGEX_CALL + match_results<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator>::maybe_assign( + const match_results<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator>& m); + +namespace re_detail{ +template BOOST_REGEX_DECL void perl_matcher<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator, match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >::allocator_type, boost::regex_traits<BOOST_REGEX_CHAR_T > >::construct_init( + const basic_regex<BOOST_REGEX_CHAR_T>& e, match_flag_type f); +template BOOST_REGEX_DECL bool perl_matcher<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator, match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >::allocator_type, boost::regex_traits<BOOST_REGEX_CHAR_T > >::match(); +template BOOST_REGEX_DECL bool perl_matcher<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator, match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >::allocator_type, boost::regex_traits<BOOST_REGEX_CHAR_T > >::find(); +} // namespace +#endif + +# ifdef template +# undef template +# endif + + +#endif + +} // namespace boost + +#endif // BOOST_REGEX_NO_EXTERNAL_TEMPLATES + + + + + diff --git a/ext/boost/regex/v4/iterator_category.hpp b/ext/boost/regex/v4/iterator_category.hpp new file mode 100644 index 0000000000..9e40142378 --- /dev/null +++ b/ext/boost/regex/v4/iterator_category.hpp @@ -0,0 +1,91 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_match.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Iterator traits for selecting an iterator type as + * an integral constant expression. + */ + + +#ifndef BOOST_REGEX_ITERATOR_CATEGORY_HPP +#define BOOST_REGEX_ITERATOR_CATEGORY_HPP + +#include <iterator> +#include <boost/type_traits/is_convertible.hpp> +#include <boost/type_traits/is_pointer.hpp> + +namespace boost{ +namespace detail{ + +template <class I> +struct is_random_imp +{ +#ifndef BOOST_NO_STD_ITERATOR_TRAITS +private: + typedef typename std::iterator_traits<I>::iterator_category cat; +public: + BOOST_STATIC_CONSTANT(bool, value = (::boost::is_convertible<cat*, std::random_access_iterator_tag*>::value)); +#else + BOOST_STATIC_CONSTANT(bool, value = false); +#endif +}; + +template <class I> +struct is_random_pointer_imp +{ + BOOST_STATIC_CONSTANT(bool, value = true); +}; + +template <bool is_pointer_type> +struct is_random_imp_selector +{ + template <class I> + struct rebind + { + typedef is_random_imp<I> type; + }; +}; + +template <> +struct is_random_imp_selector<true> +{ + template <class I> + struct rebind + { + typedef is_random_pointer_imp<I> type; + }; +}; + +} + +template <class I> +struct is_random_access_iterator +{ +private: + typedef detail::is_random_imp_selector< ::boost::is_pointer<I>::value> selector; + typedef typename selector::template rebind<I> bound_type; + typedef typename bound_type::type answer; +public: + BOOST_STATIC_CONSTANT(bool, value = answer::value); +}; + +#ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION +template <class I> +const bool is_random_access_iterator<I>::value; +#endif + +} + +#endif + diff --git a/ext/boost/regex/v4/iterator_traits.hpp b/ext/boost/regex/v4/iterator_traits.hpp new file mode 100644 index 0000000000..f7afacb17b --- /dev/null +++ b/ext/boost/regex/v4/iterator_traits.hpp @@ -0,0 +1,135 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE iterator_traits.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares iterator traits workarounds. + */ + +#ifndef BOOST_REGEX_V4_ITERATOR_TRAITS_HPP +#define BOOST_REGEX_V4_ITERATOR_TRAITS_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ +namespace re_detail{ + +#if defined(BOOST_NO_STD_ITERATOR_TRAITS) || defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + +template <class T> +struct regex_iterator_traits +{ + typedef typename T::iterator_category iterator_category; + typedef typename T::value_type value_type; +#if !defined(BOOST_NO_STD_ITERATOR) + typedef typename T::difference_type difference_type; + typedef typename T::pointer pointer; + typedef typename T::reference reference; +#else + typedef std::ptrdiff_t difference_type; + typedef value_type* pointer; + typedef value_type& reference; +#endif +}; + +template <class T> +struct pointer_iterator_traits +{ + typedef std::ptrdiff_t difference_type; + typedef T value_type; + typedef T* pointer; + typedef T& reference; + typedef std::random_access_iterator_tag iterator_category; +}; +template <class T> +struct const_pointer_iterator_traits +{ + typedef std::ptrdiff_t difference_type; + typedef T value_type; + typedef const T* pointer; + typedef const T& reference; + typedef std::random_access_iterator_tag iterator_category; +}; + +template<> +struct regex_iterator_traits<char*> : pointer_iterator_traits<char>{}; +template<> +struct regex_iterator_traits<const char*> : const_pointer_iterator_traits<char>{}; +template<> +struct regex_iterator_traits<wchar_t*> : pointer_iterator_traits<wchar_t>{}; +template<> +struct regex_iterator_traits<const wchar_t*> : const_pointer_iterator_traits<wchar_t>{}; +// +// the follwoing are needed for ICU support: +// +template<> +struct regex_iterator_traits<unsigned char*> : pointer_iterator_traits<char>{}; +template<> +struct regex_iterator_traits<const unsigned char*> : const_pointer_iterator_traits<char>{}; +template<> +struct regex_iterator_traits<int*> : pointer_iterator_traits<int>{}; +template<> +struct regex_iterator_traits<const int*> : const_pointer_iterator_traits<int>{}; + +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +template<> +struct regex_iterator_traits<unsigned short*> : pointer_iterator_traits<unsigned short>{}; +template<> +struct regex_iterator_traits<const unsigned short*> : const_pointer_iterator_traits<unsigned short>{}; +#endif + +#if defined(__SGI_STL_PORT) && defined(__STL_DEBUG) +template<> +struct regex_iterator_traits<std::string::iterator> : pointer_iterator_traits<char>{}; +template<> +struct regex_iterator_traits<std::string::const_iterator> : const_pointer_iterator_traits<char>{}; +#ifndef BOOST_NO_STD_WSTRING +template<> +struct regex_iterator_traits<std::wstring::iterator> : pointer_iterator_traits<wchar_t>{}; +template<> +struct regex_iterator_traits<std::wstring::const_iterator> : const_pointer_iterator_traits<wchar_t>{}; +#endif // BOOST_NO_WSTRING +#endif // stport + +#else + +template <class T> +struct regex_iterator_traits : public std::iterator_traits<T> {}; + +#endif + +} // namespace re_detail +} // namespace boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/ext/boost/regex/v4/match_flags.hpp b/ext/boost/regex/v4/match_flags.hpp new file mode 100644 index 0000000000..9585aca8b2 --- /dev/null +++ b/ext/boost/regex/v4/match_flags.hpp @@ -0,0 +1,138 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE match_flags.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares match_flags type. + */ + +#ifndef BOOST_REGEX_V4_MATCH_FLAGS +#define BOOST_REGEX_V4_MATCH_FLAGS + +#ifdef __cplusplus +# include <boost/cstdint.hpp> +#endif + +#ifdef __cplusplus +namespace boost{ + namespace regex_constants{ +#endif + +typedef enum _match_flags +{ + match_default = 0, + match_not_bol = 1, // first is not start of line + match_not_eol = match_not_bol << 1, // last is not end of line + match_not_bob = match_not_eol << 1, // first is not start of buffer + match_not_eob = match_not_bob << 1, // last is not end of buffer + match_not_bow = match_not_eob << 1, // first is not start of word + match_not_eow = match_not_bow << 1, // last is not end of word + match_not_dot_newline = match_not_eow << 1, // \n is not matched by '.' + match_not_dot_null = match_not_dot_newline << 1, // '\0' is not matched by '.' + match_prev_avail = match_not_dot_null << 1, // *--first is a valid expression + match_init = match_prev_avail << 1, // internal use + match_any = match_init << 1, // don't care what we match + match_not_null = match_any << 1, // string can't be null + match_continuous = match_not_null << 1, // each grep match must continue from + // uninterupted from the previous one + match_partial = match_continuous << 1, // find partial matches + + match_stop = match_partial << 1, // stop after first match (grep) V3 only + match_not_initial_null = match_stop, // don't match initial null, V4 only + match_all = match_stop << 1, // must find the whole of input even if match_any is set + match_perl = match_all << 1, // Use perl matching rules + match_posix = match_perl << 1, // Use POSIX matching rules + match_nosubs = match_posix << 1, // don't trap marked subs + match_extra = match_nosubs << 1, // include full capture information for repeated captures + match_single_line = match_extra << 1, // treat text as single line and ignor any \n's when matching ^ and $. + match_unused1 = match_single_line << 1, // unused + match_unused2 = match_unused1 << 1, // unused + match_unused3 = match_unused2 << 1, // unused + match_max = match_unused3, + + format_perl = 0, // perl style replacement + format_default = 0, // ditto. + format_sed = match_max << 1, // sed style replacement. + format_all = format_sed << 1, // enable all extentions to sytax. + format_no_copy = format_all << 1, // don't copy non-matching segments. + format_first_only = format_no_copy << 1, // Only replace first occurance. + format_is_if = format_first_only << 1, // internal use only. + format_literal = format_is_if << 1 // treat string as a literal + +} match_flags; + +#if (defined(_MSC_VER) && (_MSC_VER < 1300)) || defined(__BORLANDC__) +typedef unsigned long match_flag_type; +#else +typedef match_flags match_flag_type; + + +#ifdef __cplusplus +inline match_flags operator&(match_flags m1, match_flags m2) +{ return static_cast<match_flags>(static_cast<boost::int32_t>(m1) & static_cast<boost::int32_t>(m2)); } +inline match_flags operator|(match_flags m1, match_flags m2) +{ return static_cast<match_flags>(static_cast<boost::int32_t>(m1) | static_cast<boost::int32_t>(m2)); } +inline match_flags operator^(match_flags m1, match_flags m2) +{ return static_cast<match_flags>(static_cast<boost::int32_t>(m1) ^ static_cast<boost::int32_t>(m2)); } +inline match_flags operator~(match_flags m1) +{ return static_cast<match_flags>(~static_cast<boost::int32_t>(m1)); } +inline match_flags& operator&=(match_flags& m1, match_flags m2) +{ m1 = m1&m2; return m1; } +inline match_flags& operator|=(match_flags& m1, match_flags m2) +{ m1 = m1|m2; return m1; } +inline match_flags& operator^=(match_flags& m1, match_flags m2) +{ m1 = m1^m2; return m1; } +#endif +#endif + +#ifdef __cplusplus +} // namespace regex_constants +// +// import names into boost for backwards compatiblity: +// +using regex_constants::match_flag_type; +using regex_constants::match_default; +using regex_constants::match_not_bol; +using regex_constants::match_not_eol; +using regex_constants::match_not_bob; +using regex_constants::match_not_eob; +using regex_constants::match_not_bow; +using regex_constants::match_not_eow; +using regex_constants::match_not_dot_newline; +using regex_constants::match_not_dot_null; +using regex_constants::match_prev_avail; +//using regex_constants::match_init; +using regex_constants::match_any; +using regex_constants::match_not_null; +using regex_constants::match_continuous; +using regex_constants::match_partial; +//using regex_constants::match_stop; +using regex_constants::match_all; +using regex_constants::match_perl; +using regex_constants::match_posix; +using regex_constants::match_nosubs; +using regex_constants::match_extra; +using regex_constants::match_single_line; +//using regex_constants::match_max; +using regex_constants::format_all; +using regex_constants::format_sed; +using regex_constants::format_perl; +using regex_constants::format_default; +using regex_constants::format_no_copy; +using regex_constants::format_first_only; +//using regex_constants::format_is_if; + +} // namespace boost +#endif // __cplusplus +#endif // include guard + diff --git a/ext/boost/regex/v4/match_results.hpp b/ext/boost/regex/v4/match_results.hpp new file mode 100644 index 0000000000..09dd31f009 --- /dev/null +++ b/ext/boost/regex/v4/match_results.hpp @@ -0,0 +1,579 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE match_results.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares template class match_results. + */ + +#ifndef BOOST_REGEX_V4_MATCH_RESULTS_HPP +#define BOOST_REGEX_V4_MATCH_RESULTS_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable : 4251 4231 4660) +#endif + +namespace re_detail{ + +template <class charT> +class named_subexpressions; + +} + +template <class BidiIterator, class Allocator> +class match_results +{ +private: +#ifndef BOOST_NO_STD_ALLOCATOR + typedef std::vector<sub_match<BidiIterator>, Allocator> vector_type; +#else + typedef std::vector<sub_match<BidiIterator> > vector_type; +#endif +public: + typedef sub_match<BidiIterator> value_type; +#if !defined(BOOST_NO_STD_ALLOCATOR) && !(defined(BOOST_MSVC) && defined(_STLPORT_VERSION)) + typedef typename Allocator::const_reference const_reference; +#else + typedef const value_type& const_reference; +#endif + typedef const_reference reference; + typedef typename vector_type::const_iterator const_iterator; + typedef const_iterator iterator; + typedef typename re_detail::regex_iterator_traits< + BidiIterator>::difference_type difference_type; + typedef typename Allocator::size_type size_type; + typedef Allocator allocator_type; + typedef typename re_detail::regex_iterator_traits< + BidiIterator>::value_type char_type; + typedef std::basic_string<char_type> string_type; + typedef re_detail::named_subexpressions_base<char_type> named_sub_type; + + // construct/copy/destroy: + explicit match_results(const Allocator& a = Allocator()) +#ifndef BOOST_NO_STD_ALLOCATOR + : m_subs(a), m_base(), m_last_closed_paren(0) {} +#else + : m_subs(), m_base(), m_last_closed_paren(0) { (void)a; } +#endif + match_results(const match_results& m) + : m_subs(m.m_subs), m_base(m.m_base) {} + match_results& operator=(const match_results& m) + { + m_subs = m.m_subs; + m_base = m.m_base; + return *this; + } + ~match_results(){} + + // size: + size_type size() const + { return empty() ? 0 : m_subs.size() - 2; } + size_type max_size() const + { return m_subs.max_size(); } + bool empty() const + { return m_subs.size() < 2; } + // element access: + difference_type length(int sub = 0) const + { + sub += 2; + if((sub < (int)m_subs.size()) && (sub > 0)) + return m_subs[sub].length(); + return 0; + } + difference_type length(const char_type* sub) const + { + const char_type* end = sub; + while(*end) ++end; + return length(named_subexpression_index(sub, end)); + } + template <class charT> + difference_type length(const charT* sub) const + { + const charT* end = sub; + while(*end) ++end; + return length(named_subexpression_index(sub, end)); + } + template <class charT, class Traits, class A> + difference_type length(const std::basic_string<charT, Traits, A>& sub) const + { + return length(sub.c_str()); + } + difference_type position(size_type sub = 0) const + { + sub += 2; + if(sub < m_subs.size()) + { + const sub_match<BidiIterator>& s = m_subs[sub]; + if(s.matched || (sub == 2)) + { + return ::boost::re_detail::distance((BidiIterator)(m_base), (BidiIterator)(s.first)); + } + } + return ~static_cast<difference_type>(0); + } + difference_type position(const char_type* sub) const + { + const char_type* end = sub; + while(*end) ++end; + return position(named_subexpression_index(sub, end)); + } + template <class charT> + difference_type position(const charT* sub) const + { + const charT* end = sub; + while(*end) ++end; + return position(named_subexpression_index(sub, end)); + } + template <class charT, class Traits, class A> + difference_type position(const std::basic_string<charT, Traits, A>& sub) const + { + return position(sub.c_str()); + } + string_type str(int sub = 0) const + { + sub += 2; + string_type result; + if(sub < (int)m_subs.size() && (sub > 0)) + { + const sub_match<BidiIterator>& s = m_subs[sub]; + if(s.matched) + { + result = s.str(); + } + } + return result; + } + string_type str(const char_type* sub) const + { + return (*this)[sub].str(); + } + template <class Traits, class A> + string_type str(const std::basic_string<char_type, Traits, A>& sub) const + { + return (*this)[sub].str(); + } + template <class charT> + string_type str(const charT* sub) const + { + return (*this)[sub].str(); + } + template <class charT, class Traits, class A> + string_type str(const std::basic_string<charT, Traits, A>& sub) const + { + return (*this)[sub].str(); + } + const_reference operator[](int sub) const + { + sub += 2; + if(sub < (int)m_subs.size() && (sub >= 0)) + { + return m_subs[sub]; + } + return m_null; + } + // + // Named sub-expressions: + // + const_reference named_subexpression(const char_type* i, const char_type* j) const + { + int index = m_named_subs->get_id(i, j); + return index > 0 ? (*this)[index] : m_null; + } + template <class charT> + const_reference named_subexpression(const charT* i, const charT* j) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(i == j) + return m_null; + std::vector<char_type> s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + int named_subexpression_index(const char_type* i, const char_type* j) const + { + int index = m_named_subs->get_id(i, j); + return index > 0 ? index : -20; + } + template <class charT> + int named_subexpression_index(const charT* i, const charT* j) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(i == j) + return -20; + std::vector<char_type> s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression_index(&*s.begin(), &*s.begin() + s.size()); + } + template <class Traits, class A> + const_reference operator[](const std::basic_string<char_type, Traits, A>& s) const + { + return named_subexpression(s.c_str(), s.c_str() + s.size()); + } + const_reference operator[](const char_type* p) const + { + const char_type* e = p; + while(*e) ++e; + return named_subexpression(p, e); + } + + template <class charT> + const_reference operator[](const charT* p) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(*p == 0) + return m_null; + std::vector<char_type> s; + while(*p) + s.insert(s.end(), *p++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + template <class charT, class Traits, class A> + const_reference operator[](const std::basic_string<charT, Traits, A>& ns) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(ns.empty()) + return m_null; + std::vector<char_type> s; + for(unsigned i = 0; i < ns.size(); ++i) + s.insert(s.end(), ns[i]); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + + const_reference prefix() const + { + return (*this)[-1]; + } + + const_reference suffix() const + { + return (*this)[-2]; + } + const_iterator begin() const + { + return (m_subs.size() > 2) ? (m_subs.begin() + 2) : m_subs.end(); + } + const_iterator end() const + { + return m_subs.end(); + } + // format: + template <class OutputIterator> + OutputIterator format(OutputIterator out, + const string_type& fmt, + match_flag_type flags = format_default) const + { + re_detail::trivial_format_traits<char_type> traits; + return re_detail::regex_format_imp(out, *this, fmt.data(), fmt.data() + fmt.size(), flags, traits); + } + string_type format(const string_type& fmt, + match_flag_type flags = format_default) const + { + string_type result; + re_detail::string_out_iterator<string_type> i(result); + re_detail::trivial_format_traits<char_type> traits; + re_detail::regex_format_imp(i, *this, fmt.data(), fmt.data() + fmt.size(), flags, traits); + return result; + } + // format with locale: + template <class OutputIterator, class RegexT> + OutputIterator format(OutputIterator out, + const string_type& fmt, + match_flag_type flags, + const RegexT& re) const + { + return ::boost::re_detail::regex_format_imp(out, *this, fmt.data(), fmt.data() + fmt.size(), flags, re.get_traits()); + } + template <class RegexT> + string_type format(const string_type& fmt, + match_flag_type flags, + const RegexT& re) const + { + string_type result; + re_detail::string_out_iterator<string_type> i(result); + ::boost::re_detail::regex_format_imp(i, *this, fmt.data(), fmt.data() + fmt.size(), flags, re.get_traits()); + return result; + } + const_reference get_last_closed_paren()const + { + return m_last_closed_paren == 0 ? m_null : (*this)[m_last_closed_paren]; + } + + allocator_type get_allocator() const + { +#ifndef BOOST_NO_STD_ALLOCATOR + return m_subs.get_allocator(); +#else + return allocator_type(); +#endif + } + void swap(match_results& that) + { + std::swap(m_subs, that.m_subs); + std::swap(m_base, that.m_base); + } + bool operator==(const match_results& that)const + { + return (m_subs == that.m_subs) && (m_base == that.m_base); + } + bool operator!=(const match_results& that)const + { return !(*this == that); } + +#ifdef BOOST_REGEX_MATCH_EXTRA + typedef typename sub_match<BidiIterator>::capture_sequence_type capture_sequence_type; + + const capture_sequence_type& captures(int i)const + { + return (*this)[i].captures(); + } +#endif + + // + // private access functions: + void BOOST_REGEX_CALL set_second(BidiIterator i) + { + BOOST_ASSERT(m_subs.size() > 2); + m_subs[2].second = i; + m_subs[2].matched = true; + m_subs[0].first = i; + m_subs[0].matched = (m_subs[0].first != m_subs[0].second); + m_null.first = i; + m_null.second = i; + m_null.matched = false; + } + + void BOOST_REGEX_CALL set_second(BidiIterator i, size_type pos, bool m = true, bool escape_k = false) + { + if(pos) + m_last_closed_paren = pos; + pos += 2; + BOOST_ASSERT(m_subs.size() > pos); + m_subs[pos].second = i; + m_subs[pos].matched = m; + if((pos == 2) && !escape_k) + { + m_subs[0].first = i; + m_subs[0].matched = (m_subs[0].first != m_subs[0].second); + m_null.first = i; + m_null.second = i; + m_null.matched = false; + } + } + void BOOST_REGEX_CALL set_size(size_type n, BidiIterator i, BidiIterator j) + { + value_type v(j); + size_type len = m_subs.size(); + if(len > n + 2) + { + m_subs.erase(m_subs.begin()+n+2, m_subs.end()); + std::fill(m_subs.begin(), m_subs.end(), v); + } + else + { + std::fill(m_subs.begin(), m_subs.end(), v); + if(n+2 != len) + m_subs.insert(m_subs.end(), n+2-len, v); + } + m_subs[1].first = i; + m_last_closed_paren = 0; + } + void BOOST_REGEX_CALL set_base(BidiIterator pos) + { + m_base = pos; + } + BidiIterator base()const + { + return m_base; + } + void BOOST_REGEX_CALL set_first(BidiIterator i) + { + // set up prefix: + m_subs[1].second = i; + m_subs[1].matched = (m_subs[1].first != i); + // set up $0: + m_subs[2].first = i; + // zero out everything else: + for(size_type n = 3; n < m_subs.size(); ++n) + { + m_subs[n].first = m_subs[n].second = m_subs[0].second; + m_subs[n].matched = false; + } + } + void BOOST_REGEX_CALL set_first(BidiIterator i, size_type pos, bool escape_k = false) + { + BOOST_ASSERT(pos+2 < m_subs.size()); + if(pos || escape_k) + { + m_subs[pos+2].first = i; + if(escape_k) + { + m_subs[1].second = i; + m_subs[1].matched = (m_subs[1].first != m_subs[1].second); + } + } + else + set_first(i); + } + void BOOST_REGEX_CALL maybe_assign(const match_results<BidiIterator, Allocator>& m); + + void BOOST_REGEX_CALL set_named_subs(boost::shared_ptr<named_sub_type> subs) + { + m_named_subs = subs; + } + +private: + vector_type m_subs; // subexpressions + BidiIterator m_base; // where the search started from + sub_match<BidiIterator> m_null; // a null match + boost::shared_ptr<named_sub_type> m_named_subs; + int m_last_closed_paren; +}; + +template <class BidiIterator, class Allocator> +void BOOST_REGEX_CALL match_results<BidiIterator, Allocator>::maybe_assign(const match_results<BidiIterator, Allocator>& m) +{ + const_iterator p1, p2; + p1 = begin(); + p2 = m.begin(); + // + // Distances are measured from the start of *this* match, unless this isn't + // a valid match in which case we use the start of the whole sequence. Note that + // no subsequent match-candidate can ever be to the left of the first match found. + // This ensures that when we are using bidirectional iterators, that distances + // measured are as short as possible, and therefore as efficient as possible + // to compute. Finally note that we don't use the "matched" data member to test + // whether a sub-expression is a valid match, because partial matches set this + // to false for sub-expression 0. + // + BidiIterator l_end = this->suffix().second; + BidiIterator l_base = (p1->first == l_end) ? this->prefix().first : (*this)[0].first; + difference_type len1 = 0; + difference_type len2 = 0; + difference_type base1 = 0; + difference_type base2 = 0; + std::size_t i; + for(i = 0; i < size(); ++i, ++p1, ++p2) + { + // + // Leftmost takes priority over longest; handle special cases + // where distances need not be computed first (an optimisation + // for bidirectional iterators: ensure that we don't accidently + // compute the length of the whole sequence, as this can be really + // expensive). + // + if(p1->first == l_end) + { + if(p2->first != l_end) + { + // p2 must be better than p1, and no need to calculate + // actual distances: + base1 = 1; + base2 = 0; + break; + } + else + { + // *p1 and *p2 are either unmatched or match end-of sequence, + // either way no need to calculate distances: + if((p1->matched == false) && (p2->matched == true)) + break; + if((p1->matched == true) && (p2->matched == false)) + return; + continue; + } + } + else if(p2->first == l_end) + { + // p1 better than p2, and no need to calculate distances: + return; + } + base1 = ::boost::re_detail::distance(l_base, p1->first); + base2 = ::boost::re_detail::distance(l_base, p2->first); + BOOST_ASSERT(base1 >= 0); + BOOST_ASSERT(base2 >= 0); + if(base1 < base2) return; + if(base2 < base1) break; + + len1 = ::boost::re_detail::distance((BidiIterator)p1->first, (BidiIterator)p1->second); + len2 = ::boost::re_detail::distance((BidiIterator)p2->first, (BidiIterator)p2->second); + BOOST_ASSERT(len1 >= 0); + BOOST_ASSERT(len2 >= 0); + if((len1 != len2) || ((p1->matched == false) && (p2->matched == true))) + break; + if((p1->matched == true) && (p2->matched == false)) + return; + } + if(i == size()) + return; + if(base2 < base1) + *this = m; + else if((len2 > len1) || ((p1->matched == false) && (p2->matched == true)) ) + *this = m; +} + +template <class BidiIterator, class Allocator> +void swap(match_results<BidiIterator, Allocator>& a, match_results<BidiIterator, Allocator>& b) +{ + a.swap(b); +} + +#ifndef BOOST_NO_STD_LOCALE +template <class charT, class traits, class BidiIterator, class Allocator> +std::basic_ostream<charT, traits>& + operator << (std::basic_ostream<charT, traits>& os, + const match_results<BidiIterator, Allocator>& s) +{ + return (os << s.str()); +} +#else +template <class BidiIterator, class Allocator> +std::ostream& operator << (std::ostream& os, + const match_results<BidiIterator, Allocator>& s) +{ + return (os << s.str()); +} +#endif + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} // namespace boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + diff --git a/ext/boost/regex/v4/mem_block_cache.hpp b/ext/boost/regex/v4/mem_block_cache.hpp new file mode 100644 index 0000000000..222142dd69 --- /dev/null +++ b/ext/boost/regex/v4/mem_block_cache.hpp @@ -0,0 +1,99 @@ + /* + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE mem_block_cache.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: memory block cache used by the non-recursive matcher. + */ + +#ifndef BOOST_REGEX_V4_MEM_BLOCK_CACHE_HPP +#define BOOST_REGEX_V4_MEM_BLOCK_CACHE_HPP + +#include <new> +#ifdef BOOST_HAS_THREADS +#include <boost/regex/pending/static_mutex.hpp> +#endif + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +namespace boost{ +namespace re_detail{ + +struct mem_block_node +{ + mem_block_node* next; +}; + +struct mem_block_cache +{ + // this member has to be statically initialsed: + mem_block_node* next; + unsigned cached_blocks; +#ifdef BOOST_HAS_THREADS + boost::static_mutex mut; +#endif + + ~mem_block_cache() + { + while(next) + { + mem_block_node* old = next; + next = next->next; + ::operator delete(old); + } + } + void* get() + { +#ifdef BOOST_HAS_THREADS + boost::static_mutex::scoped_lock g(mut); +#endif + if(next) + { + mem_block_node* result = next; + next = next->next; + --cached_blocks; + return result; + } + return ::operator new(BOOST_REGEX_BLOCKSIZE); + } + void put(void* p) + { +#ifdef BOOST_HAS_THREADS + boost::static_mutex::scoped_lock g(mut); +#endif + if(cached_blocks >= BOOST_REGEX_MAX_CACHE_BLOCKS) + { + ::operator delete(p); + } + else + { + mem_block_node* old = static_cast<mem_block_node*>(p); + old->next = next; + next = old; + ++cached_blocks; + } + } +}; + +extern mem_block_cache block_cache; + +} +} // namespace boost + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + +#endif + diff --git a/ext/boost/regex/v4/perl_matcher.hpp b/ext/boost/regex/v4/perl_matcher.hpp new file mode 100644 index 0000000000..726c2881e5 --- /dev/null +++ b/ext/boost/regex/v4/perl_matcher.hpp @@ -0,0 +1,580 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + +#ifndef BOOST_REGEX_MATCHER_HPP +#define BOOST_REGEX_MATCHER_HPP + +#include <boost/regex/v4/iterator_category.hpp> + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace boost{ +namespace re_detail{ + +// +// error checking API: +// +BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex_constants::syntax_option_type ef, match_flag_type mf); +// +// function can_start: +// +template <class charT> +inline bool can_start(charT c, const unsigned char* map, unsigned char mask) +{ + return ((c < static_cast<charT>(0)) ? true : ((c >= static_cast<charT>(1 << CHAR_BIT)) ? true : map[c] & mask)); +} +inline bool can_start(char c, const unsigned char* map, unsigned char mask) +{ + return map[(unsigned char)c] & mask; +} +inline bool can_start(signed char c, const unsigned char* map, unsigned char mask) +{ + return map[(unsigned char)c] & mask; +} +inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask) +{ + return map[c] & mask; +} +inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask) +{ + return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask); +} +#if !defined(__hpux) // WCHAR_MIN not usable in pp-directives. +#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) +inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask) +{ + return ((c >= static_cast<wchar_t>(1u << CHAR_BIT)) ? true : map[c] & mask); +} +#endif +#endif +#if !defined(BOOST_NO_INTRINSIC_WCHAR_T) +inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask) +{ + return (((c >= static_cast<unsigned int>(1u << CHAR_BIT)) ? true : map[c] & mask)); +} +#endif + + +// +// Unfortunately Rogue Waves standard library appears to have a bug +// in std::basic_string::compare that results in eroneous answers +// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version +// 0x020101) the test case was: +// {39135,0} < {0xff,0} +// which succeeds when it should not. +// +#ifndef _RWSTD_VER +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1310) +template <class C, class T, class A> +inline int string_compare(const std::basic_string<C,T,A>& s, const C* p) +{ + if(0 == *p) + { + if(s.empty() || ((s.size() == 1) && (s[0] == 0))) + return 0; + } + return s.compare(p); +} +#endif +#else +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1310) +template <class C, class T, class A> +inline int string_compare(const std::basic_string<C,T,A>& s, const C* p) +{ + if(0 == *p) + { + if(s.empty() || ((s.size() == 1) && (s[0] == 0))) + return 0; + } + return s.compare(p); +} +#endif +inline int string_compare(const std::string& s, const char* p) +{ return std::strcmp(s.c_str(), p); } +# ifndef BOOST_NO_WREGEX +inline int string_compare(const std::wstring& s, const wchar_t* p) +{ return std::wcscmp(s.c_str(), p); } +#endif +#endif +template <class Seq, class C> +inline int string_compare(const Seq& s, const C* p) +{ + std::size_t i = 0; + while((i < s.size()) && (p[i] == s[i])) + { + ++i; + } + return (i == s.size()) ? -p[i] : s[i] - p[i]; +} +# define STR_COMP(s,p) string_compare(s,p) + +template<class charT> +inline const charT* re_skip_past_null(const charT* p) +{ + while (*p != static_cast<charT>(0)) ++p; + return ++p; +} + +template <class iterator, class charT, class traits_type, class char_classT> +iterator BOOST_REGEX_CALL re_is_set_member(iterator next, + iterator last, + const re_set_long<char_classT>* set_, + const regex_data<charT, traits_type>& e, bool icase) +{ + const charT* p = reinterpret_cast<const charT*>(set_+1); + iterator ptr; + unsigned int i; + //bool icase = e.m_flags & regex_constants::icase; + + if(next == last) return next; + + typedef typename traits_type::string_type traits_string_type; + const ::boost::regex_traits_wrapper<traits_type>& traits_inst = *(e.m_ptraits); + + // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never + // referenced + (void)traits_inst; + + // try and match a single character, could be a multi-character + // collating element... + for(i = 0; i < set_->csingles; ++i) + { + ptr = next; + if(*p == static_cast<charT>(0)) + { + // treat null string as special case: + if(traits_inst.translate(*ptr, icase) != *p) + { + while(*p == static_cast<charT>(0))++p; + continue; + } + return set_->isnot ? next : (ptr == next) ? ++next : ptr; + } + else + { + while(*p && (ptr != last)) + { + if(traits_inst.translate(*ptr, icase) != *p) + break; + ++p; + ++ptr; + } + + if(*p == static_cast<charT>(0)) // if null we've matched + return set_->isnot ? next : (ptr == next) ? ++next : ptr; + + p = re_skip_past_null(p); // skip null + } + } + + charT col = traits_inst.translate(*next, icase); + + + if(set_->cranges || set_->cequivalents) + { + traits_string_type s1; + // + // try and match a range, NB only a single character can match + if(set_->cranges) + { + if((e.m_flags & regex_constants::collate) == 0) + s1.assign(1, col); + else + { + charT a[2] = { col, charT(0), }; + s1 = traits_inst.transform(a, a + 1); + } + for(i = 0; i < set_->cranges; ++i) + { + if(STR_COMP(s1, p) >= 0) + { + do{ ++p; }while(*p); + ++p; + if(STR_COMP(s1, p) <= 0) + return set_->isnot ? next : ++next; + } + else + { + // skip first string + do{ ++p; }while(*p); + ++p; + } + // skip second string + do{ ++p; }while(*p); + ++p; + } + } + // + // try and match an equivalence class, NB only a single character can match + if(set_->cequivalents) + { + charT a[2] = { col, charT(0), }; + s1 = traits_inst.transform_primary(a, a +1); + for(i = 0; i < set_->cequivalents; ++i) + { + if(STR_COMP(s1, p) == 0) + return set_->isnot ? next : ++next; + // skip string + do{ ++p; }while(*p); + ++p; + } + } + } + if(traits_inst.isctype(col, set_->cclasses) == true) + return set_->isnot ? next : ++next; + if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false)) + return set_->isnot ? next : ++next; + return set_->isnot ? ++next : next; +} + +template <class BidiIterator> +class repeater_count +{ + repeater_count** stack; + repeater_count* next; + int state_id; + std::size_t count; // the number of iterations so far + BidiIterator start_pos; // where the last repeat started +public: + repeater_count(repeater_count** s) + { + stack = s; + next = 0; + state_id = -1; + count = 0; + } + repeater_count(int i, repeater_count** s, BidiIterator start) + : start_pos(start) + { + state_id = i; + stack = s; + next = *stack; + *stack = this; + if(state_id > next->state_id) + count = 0; + else + { + repeater_count* p = next; + while(p->state_id != state_id) + p = p->next; + count = p->count; + start_pos = p->start_pos; + } + } + ~repeater_count() + { + if(next) + *stack = next; + } + std::size_t get_count() { return count; } + int get_id() { return state_id; } + std::size_t operator++() { return ++count; } + bool check_null_repeat(const BidiIterator& pos, std::size_t max) + { + // this is called when we are about to start a new repeat, + // if the last one was NULL move our count to max, + // otherwise save the current position. + bool result = (count == 0) ? false : (pos == start_pos); + if(result) + count = max; + else + start_pos = pos; + return result; + } +}; + +struct saved_state; + +enum saved_state_type +{ + saved_type_end = 0, + saved_type_paren = 1, + saved_type_recurse = 2, + saved_type_assertion = 3, + saved_state_alt = 4, + saved_state_repeater_count = 5, + saved_state_extra_block = 6, + saved_state_greedy_single_repeat = 7, + saved_state_rep_slow_dot = 8, + saved_state_rep_fast_dot = 9, + saved_state_rep_char = 10, + saved_state_rep_short_set = 11, + saved_state_rep_long_set = 12, + saved_state_non_greedy_long_repeat = 13, + saved_state_count = 14 +}; + +template <class Results> +struct recursion_info +{ + typedef typename Results::value_type value_type; + typedef typename value_type::iterator iterator; + int id; + const re_syntax_base* preturn_address; + Results results; + repeater_count<iterator>* repeater_stack; +}; + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable : 4251 4231 4660) +#endif + +template <class BidiIterator, class Allocator, class traits> +class perl_matcher +{ +public: + typedef typename traits::char_type char_type; + typedef perl_matcher<BidiIterator, Allocator, traits> self_type; + typedef bool (self_type::*matcher_proc_type)(void); + typedef std::size_t traits_size_type; + typedef typename is_byte<char_type>::width_type width_type; + typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type; + typedef match_results<BidiIterator, Allocator> results_type; + + perl_matcher(BidiIterator first, BidiIterator end, + match_results<BidiIterator, Allocator>& what, + const basic_regex<char_type, traits>& e, + match_flag_type f, + BidiIterator l_base) + : m_result(what), base(first), last(end), + position(first), backstop(l_base), re(e), traits_inst(e.get_traits()), + m_independent(false), next_count(&rep_obj), rep_obj(&next_count), recursion_stack_position(0) + { + construct_init(e, f); + } + + bool match(); + bool find(); + + void setf(match_flag_type f) + { m_match_flags |= f; } + void unsetf(match_flag_type f) + { m_match_flags &= ~f; } + +private: + void construct_init(const basic_regex<char_type, traits>& e, match_flag_type f); + + bool find_imp(); + bool match_imp(); +#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD + typedef bool (perl_matcher::*protected_proc_type)(); + bool protected_call(protected_proc_type); +#endif + void estimate_max_state_count(std::random_access_iterator_tag*); + void estimate_max_state_count(void*); + bool match_prefix(); + bool match_all_states(); + + // match procs, stored in s_match_vtable: + bool match_startmark(); + bool match_endmark(); + bool match_literal(); + bool match_start_line(); + bool match_end_line(); + bool match_wild(); + bool match_match(); + bool match_word_boundary(); + bool match_within_word(); + bool match_word_start(); + bool match_word_end(); + bool match_buffer_start(); + bool match_buffer_end(); + bool match_backref(); + bool match_long_set(); + bool match_set(); + bool match_jump(); + bool match_alt(); + bool match_rep(); + bool match_combining(); + bool match_soft_buffer_end(); + bool match_restart_continue(); + bool match_long_set_repeat(); + bool match_set_repeat(); + bool match_char_repeat(); + bool match_dot_repeat_fast(); + bool match_dot_repeat_slow(); + bool match_dot_repeat_dispatch() + { + return ::boost::is_random_access_iterator<BidiIterator>::value ? match_dot_repeat_fast() : match_dot_repeat_slow(); + } + bool match_backstep(); + bool match_assert_backref(); + bool match_toggle_case(); +#ifdef BOOST_REGEX_RECURSIVE + bool backtrack_till_match(std::size_t count); +#endif + bool match_recursion(); + + // find procs stored in s_find_vtable: + bool find_restart_any(); + bool find_restart_word(); + bool find_restart_line(); + bool find_restart_buf(); + bool find_restart_lit(); + +private: + // final result structure to be filled in: + match_results<BidiIterator, Allocator>& m_result; + // temporary result for POSIX matches: + scoped_ptr<match_results<BidiIterator, Allocator> > m_temp_match; + // pointer to actual result structure to fill in: + match_results<BidiIterator, Allocator>* m_presult; + // start of sequence being searched: + BidiIterator base; + // end of sequence being searched: + BidiIterator last; + // current character being examined: + BidiIterator position; + // where to restart next search after failed match attempt: + BidiIterator restart; + // where the current search started from, acts as base for $` during grep: + BidiIterator search_base; + // how far we can go back when matching lookbehind: + BidiIterator backstop; + // the expression being examined: + const basic_regex<char_type, traits>& re; + // the expression's traits class: + const ::boost::regex_traits_wrapper<traits>& traits_inst; + // the next state in the machine being matched: + const re_syntax_base* pstate; + // matching flags in use: + match_flag_type m_match_flags; + // how many states we have examined so far: + boost::uintmax_t state_count; + // max number of states to examine before giving up: + boost::uintmax_t max_state_count; + // whether we should ignore case or not: + bool icase; + // set to true when (position == last), indicates that we may have a partial match: + bool m_has_partial_match; + // set to true whenever we get a match: + bool m_has_found_match; + // set to true whenever we're inside an independent sub-expression: + bool m_independent; + // the current repeat being examined: + repeater_count<BidiIterator>* next_count; + // the first repeat being examined (top of linked list): + repeater_count<BidiIterator> rep_obj; + // the mask to pass when matching word boundaries: + typename traits::char_class_type m_word_mask; + // the bitmask to use when determining whether a match_any matches a newline or not: + unsigned char match_any_mask; + // recursion information: + recursion_info<results_type> recursion_stack[50]; + unsigned recursion_stack_position; + +#ifdef BOOST_REGEX_NON_RECURSIVE + // + // additional members for non-recursive version: + // + typedef bool (self_type::*unwind_proc_type)(bool); + + void extend_stack(); + bool unwind(bool); + bool unwind_end(bool); + bool unwind_paren(bool); + bool unwind_recursion_stopper(bool); + bool unwind_assertion(bool); + bool unwind_alt(bool); + bool unwind_repeater_counter(bool); + bool unwind_extra_block(bool); + bool unwind_greedy_single_repeat(bool); + bool unwind_slow_dot_repeat(bool); + bool unwind_fast_dot_repeat(bool); + bool unwind_char_repeat(bool); + bool unwind_short_set_repeat(bool); + bool unwind_long_set_repeat(bool); + bool unwind_non_greedy_repeat(bool); + bool unwind_recursion(bool); + bool unwind_recursion_pop(bool); + void destroy_single_repeat(); + void push_matched_paren(int index, const sub_match<BidiIterator>& sub); + void push_recursion_stopper(); + void push_assertion(const re_syntax_base* ps, bool positive); + void push_alt(const re_syntax_base* ps); + void push_repeater_count(int i, repeater_count<BidiIterator>** s); + void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id); + void push_non_greedy_repeat(const re_syntax_base* ps); + void push_recursion(int id, const re_syntax_base* p, results_type* presults); + void push_recursion_pop(); + + // pointer to base of stack: + saved_state* m_stack_base; + // pointer to current stack position: + saved_state* m_backup_state; + // determines what value to return when unwinding from recursion, + // allows for mixed recursive/non-recursive algorithm: + bool m_recursive_result; + // how many memory blocks have we used up?: + unsigned used_block_count; +#endif + + // these operations aren't allowed, so are declared private, + // bodies are provided to keep explicit-instantiation requests happy: + perl_matcher& operator=(const perl_matcher&) + { + return *this; + } + perl_matcher(const perl_matcher& that) + : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {} +}; + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace re_detail + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +// +// include the implementation of perl_matcher: +// +#ifdef BOOST_REGEX_RECURSIVE +#include <boost/regex/v4/perl_matcher_recursive.hpp> +#else +#include <boost/regex/v4/perl_matcher_non_recursive.hpp> +#endif +// this one has to be last: +#include <boost/regex/v4/perl_matcher_common.hpp> + +#endif + diff --git a/ext/boost/regex/v4/perl_matcher_common.hpp b/ext/boost/regex/v4/perl_matcher_common.hpp new file mode 100644 index 0000000000..fd439f84b6 --- /dev/null +++ b/ext/boost/regex/v4/perl_matcher_common.hpp @@ -0,0 +1,949 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE perl_matcher_common.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Definitions of perl_matcher member functions that are + * common to both the recursive and non-recursive versions. + */ + +#ifndef BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP +#define BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef __BORLANDC__ +# pragma option push -w-8008 -w-8066 +#endif +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace boost{ +namespace re_detail{ + +template <class BidiIterator, class Allocator, class traits> +void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f) +{ + typedef typename regex_iterator_traits<BidiIterator>::iterator_category category; + typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type; + + if(e.empty()) + { + // precondition failure: e is not a valid regex. + std::invalid_argument ex("Invalid regular expression object"); + boost::throw_exception(ex); + } + pstate = 0; + m_match_flags = f; + estimate_max_state_count(static_cast<category*>(0)); + expression_flag_type re_f = re.flags(); + icase = re_f & regex_constants::icase; + if(!(m_match_flags & (match_perl|match_posix))) + { + if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0) + m_match_flags |= match_perl; + else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) + m_match_flags |= match_perl; + else + m_match_flags |= match_posix; + } + if(m_match_flags & match_posix) + { + m_temp_match.reset(new match_results<BidiIterator, Allocator>()); + m_presult = m_temp_match.get(); + } + else + m_presult = &m_result; +#ifdef BOOST_REGEX_NON_RECURSIVE + m_stack_base = 0; + m_backup_state = 0; +#endif + // find the value to use for matching word boundaries: + m_word_mask = re.get_data().m_word_mask; + // find bitmask to use for matching '.': + match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? re_detail::test_not_newline : re_detail::test_newline); +} + +template <class BidiIterator, class Allocator, class traits> +void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*) +{ + // + // How many states should we allow our machine to visit before giving up? + // This is a heuristic: it takes the greater of O(N^2) and O(NS^2) + // where N is the length of the string, and S is the number of states + // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2) + // but these take unreasonably amounts of time to bale out in pathological + // cases. + // + // Calculate NS^2 first: + // + static const boost::uintmax_t k = 100000; + boost::uintmax_t dist = boost::re_detail::distance(base, last); + if(dist == 0) + dist = 1; + boost::uintmax_t states = re.size(); + if(states == 0) + states = 1; + states *= states; + if((std::numeric_limits<boost::uintmax_t>::max)() / dist < states) + { + max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2; + return; + } + states *= dist; + if((std::numeric_limits<boost::uintmax_t>::max)() - k < states) + { + max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2; + return; + } + states += k; + + max_state_count = states; + + // + // Now calculate N^2: + // + states = dist; + if((std::numeric_limits<boost::uintmax_t>::max)() / dist < states) + { + max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2; + return; + } + states *= dist; + if((std::numeric_limits<boost::uintmax_t>::max)() - k < states) + { + max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2; + return; + } + states += k; + // + // N^2 can be a very large number indeed, to prevent things getting out + // of control, cap the max states: + // + if(states > BOOST_REGEX_MAX_STATE_COUNT) + states = BOOST_REGEX_MAX_STATE_COUNT; + // + // If (the possibly capped) N^2 is larger than our first estimate, + // use this instead: + // + if(states > max_state_count) + max_state_count = states; +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*) +{ + // we don't know how long the sequence is: + max_state_count = BOOST_REGEX_MAX_STATE_COUNT; +} + +#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD +template <class BidiIterator, class Allocator, class traits> +inline bool perl_matcher<BidiIterator, Allocator, traits>::protected_call( + protected_proc_type proc) +{ + ::boost::re_detail::concrete_protected_call + <perl_matcher<BidiIterator, Allocator, traits> > + obj(this, proc); + return obj.execute(); + +} +#endif + +template <class BidiIterator, class Allocator, class traits> +inline bool perl_matcher<BidiIterator, Allocator, traits>::match() +{ +#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD + return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::match_imp); +#else + return match_imp(); +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_imp() +{ + // initialise our stack if we are non-recursive: +#ifdef BOOST_REGEX_NON_RECURSIVE + save_state_init init(&m_stack_base, &m_backup_state); + used_block_count = BOOST_REGEX_MAX_BLOCKS; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif +#endif + + // reset our state machine: + position = base; + search_base = base; + state_count = 0; + m_match_flags |= regex_constants::match_all; + m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last); + m_presult->set_base(base); + m_presult->set_named_subs(re_detail::convert_to_named_subs<typename match_results<BidiIterator>::char_type>(this->re.get_named_subs())); + if(m_match_flags & match_posix) + m_result = *m_presult; + verify_options(re.flags(), m_match_flags); + if(0 == match_prefix()) + return false; + return (m_result[0].second == last) && (m_result[0].first == base); + +#if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)){} + throw; + } +#endif +} + +template <class BidiIterator, class Allocator, class traits> +inline bool perl_matcher<BidiIterator, Allocator, traits>::find() +{ +#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD + return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::find_imp); +#else + return find_imp(); +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::find_imp() +{ + static matcher_proc_type const s_find_vtable[7] = + { + &perl_matcher<BidiIterator, Allocator, traits>::find_restart_any, + &perl_matcher<BidiIterator, Allocator, traits>::find_restart_word, + &perl_matcher<BidiIterator, Allocator, traits>::find_restart_line, + &perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf, + &perl_matcher<BidiIterator, Allocator, traits>::match_prefix, + &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit, + &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit, + }; + + // initialise our stack if we are non-recursive: +#ifdef BOOST_REGEX_NON_RECURSIVE + save_state_init init(&m_stack_base, &m_backup_state); + used_block_count = BOOST_REGEX_MAX_BLOCKS; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif +#endif + + state_count = 0; + if((m_match_flags & regex_constants::match_init) == 0) + { + // reset our state machine: + search_base = position = base; + pstate = re.get_first_state(); + m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last); + m_presult->set_base(base); + m_presult->set_named_subs(re_detail::convert_to_named_subs<typename match_results<BidiIterator>::char_type>(this->re.get_named_subs())); + m_match_flags |= regex_constants::match_init; + } + else + { + // start again: + search_base = position = m_result[0].second; + // If last match was null and match_not_null was not set then increment + // our start position, otherwise we go into an infinite loop: + if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0)) + { + if(position == last) + return false; + else + ++position; + } + // reset $` start: + m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last); + //if((base != search_base) && (base == backstop)) + // m_match_flags |= match_prev_avail; + } + if(m_match_flags & match_posix) + { + m_result.set_size(re.mark_count(), base, last); + m_result.set_base(base); + } + + verify_options(re.flags(), m_match_flags); + // find out what kind of expression we have: + unsigned type = (m_match_flags & match_continuous) ? + static_cast<unsigned int>(regbase::restart_continue) + : static_cast<unsigned int>(re.get_restart_type()); + + // call the appropriate search routine: + matcher_proc_type proc = s_find_vtable[type]; + return (this->*proc)(); + +#if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)){} + throw; + } +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix() +{ + m_has_partial_match = false; + m_has_found_match = false; + pstate = re.get_first_state(); + m_presult->set_first(position); + restart = position; + match_all_states(); + if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial)) + { + m_has_found_match = true; + m_presult->set_second(last, 0, false); + position = last; + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(m_has_found_match && (match_extra & m_match_flags)) + { + // + // we have a match, reverse the capture information: + // + for(unsigned i = 0; i < m_presult->size(); ++i) + { + typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures(); + std::reverse(seq.begin(), seq.end()); + } + } +#endif + if(!m_has_found_match) + position = restart; // reset search postion + return m_has_found_match; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_literal() +{ + unsigned int len = static_cast<const re_literal*>(pstate)->length; + const char_type* what = reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1); + // + // compare string with what we stored in + // our records: + for(unsigned int i = 0; i < len; ++i, ++position) + { + if((position == last) || (traits_inst.translate(*position, icase) != what[i])) + return false; + } + pstate = pstate->next.p; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_start_line() +{ + if(position == backstop) + { + if((m_match_flags & match_prev_avail) == 0) + { + if((m_match_flags & match_not_bol) == 0) + { + pstate = pstate->next.p; + return true; + } + return false; + } + } + else if(m_match_flags & match_single_line) + return false; + + // check the previous value character: + BidiIterator t(position); + --t; + if(position != last) + { + if(is_separator(*t) && !((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) ) + { + pstate = pstate->next.p; + return true; + } + } + else if(is_separator(*t)) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_end_line() +{ + if(position != last) + { + if(m_match_flags & match_single_line) + return false; + // we're not yet at the end so *first is always valid: + if(is_separator(*position)) + { + if((position != backstop) || (m_match_flags & match_prev_avail)) + { + // check that we're not in the middle of \r\n sequence + BidiIterator t(position); + --t; + if((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) + { + return false; + } + } + pstate = pstate->next.p; + return true; + } + } + else if((m_match_flags & match_not_eol) == 0) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_wild() +{ + if(position == last) + return false; + if(is_separator(*position) && ((match_any_mask & static_cast<const re_dot*>(pstate)->mask) == 0)) + return false; + if((*position == char_type(0)) && (m_match_flags & match_not_dot_null)) + return false; + pstate = pstate->next.p; + ++position; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary() +{ + bool b; // indcates whether next character is a word character + if(position != last) + { + // prev and this character must be opposites: + #if defined(BOOST_REGEX_USE_C_LOCALE) && defined(__GNUC__) && (__GNUC__ == 2) && (__GNUC_MINOR__ < 95) + b = traits::isctype(*position, m_word_mask); + #else + b = traits_inst.isctype(*position, m_word_mask); + #endif + } + else + { + b = (m_match_flags & match_not_eow) ? true : false; + } + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + { + if(m_match_flags & match_not_bow) + b ^= true; + else + b ^= false; + } + else + { + --position; + b ^= traits_inst.isctype(*position, m_word_mask); + ++position; + } + if(b) + { + pstate = pstate->next.p; + return true; + } + return false; // no match if we get to here... +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_within_word() +{ + if(position == last) + return false; + // both prev and this character must be m_word_mask: + bool prev = traits_inst.isctype(*position, m_word_mask); + { + bool b; + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + return false; + else + { + --position; + b = traits_inst.isctype(*position, m_word_mask); + ++position; + } + if(b == prev) + { + pstate = pstate->next.p; + return true; + } + } + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_word_start() +{ + if(position == last) + return false; // can't be starting a word if we're already at the end of input + if(!traits_inst.isctype(*position, m_word_mask)) + return false; // next character isn't a word character + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + { + if(m_match_flags & match_not_bow) + return false; // no previous input + } + else + { + // otherwise inside buffer: + BidiIterator t(position); + --t; + if(traits_inst.isctype(*t, m_word_mask)) + return false; // previous character not non-word + } + // OK we have a match: + pstate = pstate->next.p; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_word_end() +{ + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + return false; // start of buffer can't be end of word + BidiIterator t(position); + --t; + if(traits_inst.isctype(*t, m_word_mask) == false) + return false; // previous character wasn't a word character + + if(position == last) + { + if(m_match_flags & match_not_eow) + return false; // end of buffer but not end of word + } + else + { + // otherwise inside buffer: + if(traits_inst.isctype(*position, m_word_mask)) + return false; // next character is a word character + } + pstate = pstate->next.p; + return true; // if we fall through to here then we've succeeded +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start() +{ + if((position != backstop) || (m_match_flags & match_not_bob)) + return false; + // OK match: + pstate = pstate->next.p; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end() +{ + if((position != last) || (m_match_flags & match_not_eob)) + return false; + // OK match: + pstate = pstate->next.p; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_backref() +{ + // + // Compare with what we previously matched. + // Note that this succeeds if the backref did not partisipate + // in the match, this is in line with ECMAScript, but not Perl + // or PCRE. + // + BidiIterator i = (*m_presult)[static_cast<const re_brace*>(pstate)->index].first; + BidiIterator j = (*m_presult)[static_cast<const re_brace*>(pstate)->index].second; + while(i != j) + { + if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase))) + return false; + ++i; + ++position; + } + pstate = pstate->next.p; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set() +{ + typedef typename traits::char_class_type char_class_type; + // let the traits class do the work: + if(position == last) + return false; + BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data(), icase); + if(t != position) + { + pstate = pstate->next.p; + position = t; + return true; + } + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_set() +{ + if(position == last) + return false; + if(static_cast<const re_set*>(pstate)->_map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) + { + pstate = pstate->next.p; + ++position; + return true; + } + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_jump() +{ + pstate = static_cast<const re_jump*>(pstate)->alt.p; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_combining() +{ + if(position == last) + return false; + if(is_combining(traits_inst.translate(*position, icase))) + return false; + ++position; + while((position != last) && is_combining(traits_inst.translate(*position, icase))) + ++position; + pstate = pstate->next.p; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end() +{ + if(m_match_flags & match_not_eob) + return false; + BidiIterator p(position); + while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p; + if(p != last) + return false; + pstate = pstate->next.p; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue() +{ + if(position == search_base) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + if( ::boost::is_random_access_iterator<BidiIterator>::value) + { + std::ptrdiff_t maxlen = ::boost::re_detail::distance(backstop, position); + if(maxlen < static_cast<const re_brace*>(pstate)->index) + return false; + std::advance(position, -static_cast<const re_brace*>(pstate)->index); + } + else + { + int c = static_cast<const re_brace*>(pstate)->index; + while(c--) + { + if(position == backstop) + return false; + --position; + } + } + pstate = pstate->next.p; + return true; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref() +{ + // return true if marked sub-expression N has been matched: + int index = static_cast<const re_brace*>(pstate)->index; + bool result; + if(index == 9999) + { + // Magic value for a (DEFINE) block: + return false; + } + else if(index > 0) + { + // Check if index is a hash value: + if(index >= 10000) + index = re.get_data().get_id(index); + // Have we matched subexpression "index"? + result = (*m_presult)[index].matched; + pstate = pstate->next.p; + } + else + { + // Have we recursed into subexpression "index"? + // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1. + int id = -index-1; + if(id >= 10000) + id = re.get_data().get_id(id); + result = recursion_stack_position && ((recursion_stack[recursion_stack_position-1].id == id) || (index == 0)); + pstate = pstate->next.p; + } + return result; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case() +{ + // change our case sensitivity: + this->icase = static_cast<const re_case*>(pstate)->icase; + pstate = pstate->next.p; + return true; +} + + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + const unsigned char* _map = re.get_map(); + while(true) + { + // skip everything we can't match: + while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) ) + ++position; + if(position == last) + { + // run out of characters, try a null match if possible: + if(re.can_be_null()) + return match_prefix(); + break; + } + // now try and obtain a match: + if(match_prefix()) + return true; + if(position == last) + return false; + ++position; + } + return false; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_word() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + // do search optimised for word starts: + const unsigned char* _map = re.get_map(); + if((m_match_flags & match_prev_avail) || (position != base)) + --position; + else if(match_prefix()) + return true; + do + { + while((position != last) && traits_inst.isctype(*position, m_word_mask)) + ++position; + while((position != last) && !traits_inst.isctype(*position, m_word_mask)) + ++position; + if(position == last) + break; + + if(can_start(*position, _map, (unsigned char)mask_any) ) + { + if(match_prefix()) + return true; + } + if(position == last) + break; + } while(true); + return false; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_line() +{ + // do search optimised for line starts: + const unsigned char* _map = re.get_map(); + if(match_prefix()) + return true; + while(position != last) + { + while((position != last) && !is_separator(*position)) + ++position; + if(position == last) + return false; + ++position; + if(position == last) + { + if(re.can_be_null() && match_prefix()) + return true; + return false; + } + + if( can_start(*position, _map, (unsigned char)mask_any) ) + { + if(match_prefix()) + return true; + } + if(position == last) + return false; + //++position; + } + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf() +{ + if((position == base) && ((m_match_flags & match_not_bob) == 0)) + return match_prefix(); + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit() +{ +#if 0 + if(position == last) + return false; // can't possibly match if we're at the end already + + unsigned type = (m_match_flags & match_continuous) ? + static_cast<unsigned int>(regbase::restart_continue) + : static_cast<unsigned int>(re.get_restart_type()); + + const kmp_info<char_type>* info = access::get_kmp(re); + int len = info->len; + const char_type* x = info->pstr; + int j = 0; + while (position != last) + { + while((j > -1) && (x[j] != traits_inst.translate(*position, icase))) + j = info->kmp_next[j]; + ++position; + ++j; + if(j >= len) + { + if(type == regbase::restart_fixed_lit) + { + std::advance(position, -j); + restart = position; + std::advance(restart, len); + m_result.set_first(position); + m_result.set_second(restart); + position = restart; + return true; + } + else + { + restart = position; + std::advance(position, -j); + if(match_prefix()) + return true; + else + { + for(int k = 0; (restart != position) && (k < j); ++k, --restart) + {} // dwa 10/20/2000 - warning suppression for MWCW + if(restart != last) + ++restart; + position = restart; + j = 0; //we could do better than this... + } + } + } + } + if((m_match_flags & match_partial) && (position == last) && j) + { + // we need to check for a partial match: + restart = position; + std::advance(position, -j); + return match_prefix(); + } +#endif + return false; +} + +} // namespace re_detail + +} // namespace boost + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +#ifdef __BORLANDC__ +# pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/ext/boost/regex/v4/perl_matcher_non_recursive.hpp b/ext/boost/regex/v4/perl_matcher_non_recursive.hpp new file mode 100644 index 0000000000..2ce7ebe034 --- /dev/null +++ b/ext/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -0,0 +1,1635 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE perl_matcher_common.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Definitions of perl_matcher member functions that are + * specific to the non-recursive implementation. + */ + +#ifndef BOOST_REGEX_V4_PERL_MATCHER_NON_RECURSIVE_HPP +#define BOOST_REGEX_V4_PERL_MATCHER_NON_RECURSIVE_HPP + +#include <new> + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace boost{ +namespace re_detail{ + +template <class T> +inline void inplace_destroy(T* p) +{ + (void)p; // warning suppression + p->~T(); +} + +struct saved_state +{ + union{ + unsigned int state_id; + // this padding ensures correct alignment on 64-bit platforms: + std::size_t padding1; + std::ptrdiff_t padding2; + void* padding3; + }; + saved_state(unsigned i) : state_id(i) {} +}; + +template <class BidiIterator> +struct saved_matched_paren : public saved_state +{ + int index; + sub_match<BidiIterator> sub; + saved_matched_paren(int i, const sub_match<BidiIterator>& s) : saved_state(1), index(i), sub(s){}; +}; + +template <class BidiIterator> +struct saved_position : public saved_state +{ + const re_syntax_base* pstate; + BidiIterator position; + saved_position(const re_syntax_base* ps, BidiIterator pos, int i) : saved_state(i), pstate(ps), position(pos){}; +}; + +template <class BidiIterator> +struct saved_assertion : public saved_position<BidiIterator> +{ + bool positive; + saved_assertion(bool p, const re_syntax_base* ps, BidiIterator pos) + : saved_position<BidiIterator>(ps, pos, saved_type_assertion), positive(p){}; +}; + +template <class BidiIterator> +struct saved_repeater : public saved_state +{ + repeater_count<BidiIterator> count; + saved_repeater(int i, repeater_count<BidiIterator>** s, BidiIterator start) + : saved_state(saved_state_repeater_count), count(i,s,start){} +}; + +struct saved_extra_block : public saved_state +{ + saved_state *base, *end; + saved_extra_block(saved_state* b, saved_state* e) + : saved_state(saved_state_extra_block), base(b), end(e) {} +}; + +struct save_state_init +{ + saved_state** stack; + save_state_init(saved_state** base, saved_state** end) + : stack(base) + { + *base = static_cast<saved_state*>(get_mem_block()); + *end = reinterpret_cast<saved_state*>(reinterpret_cast<char*>(*base)+BOOST_REGEX_BLOCKSIZE); + --(*end); + (void) new (*end)saved_state(0); + BOOST_ASSERT(*end > *base); + } + ~save_state_init() + { + put_mem_block(*stack); + *stack = 0; + } +}; + +template <class BidiIterator> +struct saved_single_repeat : public saved_state +{ + std::size_t count; + const re_repeat* rep; + BidiIterator last_position; + saved_single_repeat(std::size_t c, const re_repeat* r, BidiIterator lp, int arg_id) + : saved_state(arg_id), count(c), rep(r), last_position(lp){} +}; + +template <class Results> +struct saved_recursion : public saved_state +{ + saved_recursion(int id, const re_syntax_base* p, Results* pr) + : saved_state(14), recursion_id(id), preturn_address(p), results(*pr) + {} + int recursion_id; + const re_syntax_base* preturn_address; + Results results; +}; + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() +{ + static matcher_proc_type const s_match_vtable[30] = + { + (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark), + &perl_matcher<BidiIterator, Allocator, traits>::match_endmark, + &perl_matcher<BidiIterator, Allocator, traits>::match_literal, + &perl_matcher<BidiIterator, Allocator, traits>::match_start_line, + &perl_matcher<BidiIterator, Allocator, traits>::match_end_line, + &perl_matcher<BidiIterator, Allocator, traits>::match_wild, + &perl_matcher<BidiIterator, Allocator, traits>::match_match, + &perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary, + &perl_matcher<BidiIterator, Allocator, traits>::match_within_word, + &perl_matcher<BidiIterator, Allocator, traits>::match_word_start, + &perl_matcher<BidiIterator, Allocator, traits>::match_word_end, + &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start, + &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end, + &perl_matcher<BidiIterator, Allocator, traits>::match_backref, + &perl_matcher<BidiIterator, Allocator, traits>::match_long_set, + &perl_matcher<BidiIterator, Allocator, traits>::match_set, + &perl_matcher<BidiIterator, Allocator, traits>::match_jump, + &perl_matcher<BidiIterator, Allocator, traits>::match_alt, + &perl_matcher<BidiIterator, Allocator, traits>::match_rep, + &perl_matcher<BidiIterator, Allocator, traits>::match_combining, + &perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end, + &perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue, + // Although this next line *should* be evaluated at compile time, in practice + // some compilers (VC++) emit run-time initialisation which breaks thread + // safety, so use a dispatch function instead: + //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow), + &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_dispatch, + &perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::match_backstep, + &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref, + &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case, + &perl_matcher<BidiIterator, Allocator, traits>::match_recursion, + }; + + push_recursion_stopper(); + do{ + while(pstate) + { + matcher_proc_type proc = s_match_vtable[pstate->type]; + ++state_count; + if(!(this->*proc)()) + { + if(state_count > max_state_count) + raise_error(traits_inst, regex_constants::error_space); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + bool successful_unwind = unwind(false); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(false == successful_unwind) + return m_recursive_result; + } + } + }while(unwind(true)); + return m_recursive_result; +} + +template <class BidiIterator, class Allocator, class traits> +void perl_matcher<BidiIterator, Allocator, traits>::extend_stack() +{ + if(used_block_count) + { + --used_block_count; + saved_state* stack_base; + saved_state* backup_state; + stack_base = static_cast<saved_state*>(get_mem_block()); + backup_state = reinterpret_cast<saved_state*>(reinterpret_cast<char*>(stack_base)+BOOST_REGEX_BLOCKSIZE); + saved_extra_block* block = static_cast<saved_extra_block*>(backup_state); + --block; + (void) new (block) saved_extra_block(m_stack_base, m_backup_state); + m_stack_base = stack_base; + m_backup_state = block; + } + else + raise_error(traits_inst, regex_constants::error_size); +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::push_matched_paren(int index, const sub_match<BidiIterator>& sub) +{ + //BOOST_ASSERT(index); + saved_matched_paren<BidiIterator>* pmp = static_cast<saved_matched_paren<BidiIterator>*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_matched_paren<BidiIterator>*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_matched_paren<BidiIterator>(index, sub); + m_backup_state = pmp; +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::push_recursion_stopper() +{ + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(saved_type_recurse); + m_backup_state = pmp; +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::push_assertion(const re_syntax_base* ps, bool positive) +{ + saved_assertion<BidiIterator>* pmp = static_cast<saved_assertion<BidiIterator>*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_assertion<BidiIterator>*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_assertion<BidiIterator>(positive, ps, position); + m_backup_state = pmp; +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::push_alt(const re_syntax_base* ps) +{ + saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position<BidiIterator>(ps, position, saved_state_alt); + m_backup_state = pmp; +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::push_non_greedy_repeat(const re_syntax_base* ps) +{ + saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position<BidiIterator>(ps, position, saved_state_non_greedy_long_repeat); + m_backup_state = pmp; +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::push_repeater_count(int i, repeater_count<BidiIterator>** s) +{ + saved_repeater<BidiIterator>* pmp = static_cast<saved_repeater<BidiIterator>*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_repeater<BidiIterator>*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_repeater<BidiIterator>(i, s, position); + m_backup_state = pmp; +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id) +{ + saved_single_repeat<BidiIterator>* pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_single_repeat<BidiIterator>(c, r, last_position, state_id); + m_backup_state = pmp; +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::push_recursion(int id, const re_syntax_base* p, results_type* presults) +{ + saved_recursion<results_type>* pmp = static_cast<saved_recursion<results_type>*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_recursion<results_type>*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_recursion<results_type>(id, p, presults); + m_backup_state = pmp; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark() +{ + int index = static_cast<const re_brace*>(pstate)->index; + icase = static_cast<const re_brace*>(pstate)->icase; + switch(index) + { + case 0: + pstate = pstate->next.p; + break; + case -1: + case -2: + { + // forward lookahead assert: + const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + push_assertion(next_pstate, index == -1); + break; + } + case -3: + { + // independent sub-expression, currently this is always recursive: + bool old_independent = m_independent; + m_independent = true; + const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + bool r = match_all_states(); + pstate = next_pstate; + m_independent = old_independent; +#ifdef BOOST_REGEX_MATCH_EXTRA + if(r && (m_match_flags & match_extra)) + { + // + // our captures have been stored in *m_presult + // we need to unpack them, and insert them + // back in the right order when we unwind the stack: + // + match_results<BidiIterator, Allocator> temp_match(*m_presult); + unsigned i; + for(i = 0; i < temp_match.size(); ++i) + (*m_presult)[i].get_captures().clear(); + // match everything else: + r = match_all_states(); + // now place the stored captures back: + for(i = 0; i < temp_match.size(); ++i) + { + typedef typename sub_match<BidiIterator>::capture_sequence_type seq; + seq& s1 = (*m_presult)[i].get_captures(); + const seq& s2 = temp_match[i].captures(); + s1.insert( + s1.end(), + s2.begin(), + s2.end()); + } + } +#endif + return r; + } + case -4: + { + // conditional expression: + const re_alt* alt = static_cast<const re_alt*>(pstate->next.p); + BOOST_ASSERT(alt->type == syntax_element_alt); + pstate = alt->next.p; + if(pstate->type == syntax_element_assert_backref) + { + if(!match_assert_backref()) + pstate = alt->alt.p; + break; + } + else + { + // zero width assertion, have to match this recursively: + BOOST_ASSERT(pstate->type == syntax_element_startmark); + bool negated = static_cast<const re_brace*>(pstate)->index == -2; + BidiIterator saved_position = position; + const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + bool r = match_all_states(); + position = saved_position; + if(negated) + r = !r; + if(r) + pstate = next_pstate; + else + pstate = alt->alt.p; + break; + } + } + case -5: + { + push_matched_paren(0, (*m_presult)[0]); + m_presult->set_first(position, 0, true); + pstate = pstate->next.p; + break; + } + default: + { + BOOST_ASSERT(index > 0); + if((m_match_flags & match_nosubs) == 0) + { + push_matched_paren(index, (*m_presult)[index]); + m_presult->set_first(position, index); + } + pstate = pstate->next.p; + break; + } + } + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_alt() +{ + bool take_first, take_second; + const re_alt* jmp = static_cast<const re_alt*>(pstate); + + // find out which of these two alternatives we need to take: + if(position == last) + { + take_first = jmp->can_be_null & mask_take; + take_second = jmp->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, jmp->_map, (unsigned char)mask_take); + take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip); + } + + if(take_first) + { + // we can take the first alternative, + // see if we need to push next alternative: + if(take_second) + { + push_alt(jmp->alt.p); + } + pstate = pstate->next.p; + return true; + } + if(take_second) + { + pstate = jmp->alt.p; + return true; + } + return false; // neither option is possible +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_rep() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127 4244) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + + // find out which of these two alternatives we need to take: + bool take_first, take_second; + if(position == last) + { + take_first = rep->can_be_null & mask_take; + take_second = rep->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, rep->_map, (unsigned char)mask_take); + take_second = can_start(*position, rep->_map, (unsigned char)mask_skip); + } + + if((m_backup_state->state_id != saved_state_repeater_count) + || (static_cast<saved_repeater<BidiIterator>*>(m_backup_state)->count.get_id() != rep->state_id) + || (next_count->get_id() != rep->state_id)) + { + // we're moving to a different repeat from the last + // one, so set up a counter object: + push_repeater_count(rep->state_id, &next_count); + } + // + // If we've had at least one repeat already, and the last one + // matched the NULL string then set the repeat count to + // maximum: + // + next_count->check_null_repeat(position, rep->max); + + if(next_count->get_count() < rep->min) + { + // we must take the repeat: + if(take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + return false; + } + + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // try and take the repeat if we can: + if((next_count->get_count() < rep->max) && take_first) + { + if(take_second) + { + // store position in case we fail: + push_alt(rep->alt.p); + } + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + else if(take_second) + { + pstate = rep->alt.p; + return true; + } + return false; // can't take anything, fail... + } + else // non-greedy + { + // try and skip the repeat if we can: + if(take_second) + { + if((next_count->get_count() < rep->max) && take_first) + { + // store position in case we fail: + push_non_greedy_repeat(rep->next.p); + } + pstate = rep->alt.p; + return true; + } + if((next_count->get_count() < rep->max) && take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + } + return false; +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow() +{ + unsigned count = 0; + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + re_syntax_base* psingle = rep->next.p; + // match compulsary repeats first: + while(count < rep->min) + { + pstate = psingle; + if(!match_wild()) + return false; + ++count; + } + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // repeat for as long as we can: + while(count < rep->max) + { + pstate = psingle; + if(!match_wild()) + break; + ++count; + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_slow_dot); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast() +{ + if(m_match_flags & match_not_dot_null) + return match_dot_repeat_slow(); + if((static_cast<const re_dot*>(pstate->next.p)->mask & match_any_mask) == 0) + return match_dot_repeat_slow(); + + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + unsigned count = static_cast<unsigned>((std::min)(static_cast<unsigned>(::boost::re_detail::distance(position, last)), static_cast<unsigned>(greedy ? rep->max : rep->min))); + if(rep->min > count) + { + position = last; + return false; // not enough text left to match + } + std::advance(position, count); + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_fast_dot); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + BOOST_ASSERT(1 == static_cast<const re_literal*>(rep->next.p)->length); + const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1); + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::boost::is_random_access_iterator<BidiIterator>::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + } + count = (unsigned)::boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_char); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map; + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::boost::is_random_access_iterator<BidiIterator>::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) + { + ++position; + } + count = (unsigned)::boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_short_set); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + typedef typename traits::char_class_type mask_type; + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + const re_set_long<mask_type>* set = static_cast<const re_set_long<mask_type>*>(pstate->next.p); + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::boost::is_random_access_iterator<BidiIterator>::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + } + count = (unsigned)::boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_long_set); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion() +{ + BOOST_ASSERT(pstate->type == syntax_element_recurse); + // + // Backup call stack: + // + push_recursion_pop(); + // + // Set new call stack: + // + if(recursion_stack_position >= static_cast<int>(sizeof(recursion_stack)/sizeof(recursion_stack[0]))) + { + return false; + } + recursion_stack[recursion_stack_position].preturn_address = pstate->next.p; + recursion_stack[recursion_stack_position].results = *m_presult; + pstate = static_cast<const re_jump*>(pstate)->alt.p; + recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index; + ++recursion_stack_position; + //BOOST_ASSERT(recursion_stack[recursion_stack_position-1].id); + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark() +{ + int index = static_cast<const re_brace*>(pstate)->index; + icase = static_cast<const re_brace*>(pstate)->icase; + if(index > 0) + { + if((m_match_flags & match_nosubs) == 0) + { + m_presult->set_second(position, index); + } + if(recursion_stack_position) + { + if(index == recursion_stack[recursion_stack_position-1].id) + { + --recursion_stack_position; + pstate = recursion_stack[recursion_stack_position].preturn_address; + *m_presult = recursion_stack[recursion_stack_position].results; + push_recursion(recursion_stack[recursion_stack_position].id, recursion_stack[recursion_stack_position].preturn_address, &recursion_stack[recursion_stack_position].results); + } + } + } + else if((index < 0) && (index != -4)) + { + // matched forward lookahead: + pstate = 0; + return true; + } + pstate = pstate->next.p; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_match() +{ + if(recursion_stack_position) + { + BOOST_ASSERT(0 == recursion_stack[recursion_stack_position-1].id); + --recursion_stack_position; + pstate = recursion_stack[recursion_stack_position].preturn_address; + *m_presult = recursion_stack[recursion_stack_position].results; + push_recursion(recursion_stack[recursion_stack_position].id, recursion_stack[recursion_stack_position].preturn_address, &recursion_stack[recursion_stack_position].results); + return true; + } + if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) + return false; + if((m_match_flags & match_all) && (position != last)) + return false; + if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) + return false; + m_presult->set_second(position); + pstate = 0; + m_has_found_match = true; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + if((m_match_flags & match_any) == 0) + return false; + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(match_extra & m_match_flags) + { + for(unsigned i = 0; i < m_presult->size(); ++i) + if((*m_presult)[i].matched) + ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); + } +#endif + return true; +} + +/**************************************************************************** + +Unwind and associated proceedures follow, these perform what normal stack +unwinding does in the recursive implementation. + +****************************************************************************/ + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match) +{ + static unwind_proc_type const s_unwind_table[18] = + { + &perl_matcher<BidiIterator, Allocator, traits>::unwind_end, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_paren, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_stopper, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_assertion, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_alt, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_repeater_counter, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_extra_block, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_greedy_single_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_slow_dot_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_fast_dot_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_char_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_non_greedy_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop, + }; + + m_recursive_result = have_match; + unwind_proc_type unwinder; + bool cont; + // + // keep unwinding our stack until we have something to do: + // + do + { + unwinder = s_unwind_table[m_backup_state->state_id]; + cont = (this->*unwinder)(m_recursive_result); + }while(cont); + // + // return true if we have more states to try: + // + return pstate ? true : false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_end(bool) +{ + pstate = 0; // nothing left to search + return false; // end of stack nothing more to search +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_paren(bool have_match) +{ + saved_matched_paren<BidiIterator>* pmp = static_cast<saved_matched_paren<BidiIterator>*>(m_backup_state); + // restore previous values if no match was found: + if(have_match == false) + { + m_presult->set_first(pmp->sub.first, pmp->index, pmp->index == 0); + m_presult->set_second(pmp->sub.second, pmp->index, pmp->sub.matched, pmp->index == 0); + } +#ifdef BOOST_REGEX_MATCH_EXTRA + // + // we have a match, push the capture information onto the stack: + // + else if(pmp->sub.matched && (match_extra & m_match_flags)) + ((*m_presult)[pmp->index]).get_captures().push_back(pmp->sub); +#endif + // unwind stack: + m_backup_state = pmp+1; + boost::re_detail::inplace_destroy(pmp); + return true; // keep looking +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_stopper(bool) +{ + boost::re_detail::inplace_destroy(m_backup_state++); + pstate = 0; // nothing left to search + return false; // end of stack nothing more to search +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_assertion(bool r) +{ + saved_assertion<BidiIterator>* pmp = static_cast<saved_assertion<BidiIterator>*>(m_backup_state); + pstate = pmp->pstate; + position = pmp->position; + bool result = (r == pmp->positive); + m_recursive_result = pmp->positive ? r : !r; + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return !result; // return false if the assertion was matched to stop search. +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_alt(bool r) +{ + saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + if(!r) + { + pstate = pmp->pstate; + position = pmp->position; + } + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return r; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_repeater_counter(bool) +{ + saved_repeater<BidiIterator>* pmp = static_cast<saved_repeater<BidiIterator>*>(m_backup_state); + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; // keep looking +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_extra_block(bool) +{ + saved_extra_block* pmp = static_cast<saved_extra_block*>(m_backup_state); + void* condemmed = m_stack_base; + m_stack_base = pmp->base; + m_backup_state = pmp->end; + boost::re_detail::inplace_destroy(pmp); + put_mem_block(condemmed); + return true; // keep looking +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::destroy_single_repeat() +{ + saved_single_repeat<BidiIterator>* p = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state); + boost::re_detail::inplace_destroy(p++); + m_backup_state = p; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_greedy_single_repeat(bool r) +{ + saved_single_repeat<BidiIterator>* pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + + count -= rep->min; + + if((m_match_flags & match_partial) && (position == last)) + m_has_partial_match = true; + + BOOST_ASSERT(count); + position = pmp->last_position; + + // backtrack till we can skip out: + do + { + --position; + --count; + ++state_count; + }while(count && !can_start(*position, rep->_map, mask_skip)); + + // if we've hit base, destroy this state: + if(count == 0) + { + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count + rep->min; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_slow_dot_repeat(bool r) +{ + saved_single_repeat<BidiIterator>* pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + BOOST_ASSERT(rep->type == syntax_element_dot_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_wild); + + BOOST_ASSERT(count < rep->max); + pstate = rep->next.p; + position = pmp->last_position; + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(!match_wild()) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_fast_dot_repeat(bool r) +{ + saved_single_repeat<BidiIterator>* pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + + BOOST_ASSERT(count < rep->max); + position = pmp->last_position; + if(position != last) + { + + // wind forward until we can skip out of the repeat: + do + { + ++position; + ++count; + ++state_count; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_char_repeat(bool r) +{ + saved_single_repeat<BidiIterator>* pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1); + position = pmp->last_position; + + BOOST_ASSERT(rep->type == syntax_element_char_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_literal); + BOOST_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(traits_inst.translate(*position, icase) != what) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++ position; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat(bool r) +{ + saved_single_repeat<BidiIterator>* pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map; + position = pmp->last_position; + + BOOST_ASSERT(rep->type == syntax_element_short_set_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_set); + BOOST_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(!map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++ position; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool r) +{ + typedef typename traits::char_class_type mask_type; + saved_single_repeat<BidiIterator>* pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const re_set_long<mask_type>* set = static_cast<const re_set_long<mask_type>*>(pstate); + position = pmp->last_position; + + BOOST_ASSERT(rep->type == syntax_element_long_set_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_long_set); + BOOST_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(position == re_is_set_member(position, last, set, re.get_data(), icase)) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++position; + ++count; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_non_greedy_repeat(bool r) +{ + saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + if(!r) + { + position = pmp->position; + pstate = pmp->pstate; + ++(*next_count); + } + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return r; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion(bool r) +{ + saved_recursion<results_type>* pmp = static_cast<saved_recursion<results_type>*>(m_backup_state); + if(!r) + { + recursion_stack[recursion_stack_position].id = pmp->recursion_id; + recursion_stack[recursion_stack_position].preturn_address = pmp->preturn_address; + recursion_stack[recursion_stack_position].results = pmp->results; + ++recursion_stack_position; + } + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop(bool r) +{ + saved_state* pmp = static_cast<saved_state*>(m_backup_state); + if(!r) + { + --recursion_stack_position; + } + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +void perl_matcher<BidiIterator, Allocator, traits>::push_recursion_pop() +{ + saved_state* pmp = static_cast<saved_state*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_state*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_state(15); + m_backup_state = pmp; +} +/* +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_parenthesis_pop(bool r) +{ + saved_state* pmp = static_cast<saved_state*>(m_backup_state); + if(!r) + { + --parenthesis_stack_position; + } + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +void perl_matcher<BidiIterator, Allocator, traits>::push_parenthesis_pop() +{ + saved_state* pmp = static_cast<saved_state*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_state*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_state(16); + m_backup_state = pmp; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_parenthesis_push(bool r) +{ + saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + if(!r) + { + parenthesis_stack[parenthesis_stack_position++] = pmp->position; + } + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::push_parenthesis_push(BidiIterator p) +{ + saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position<BidiIterator>(0, p, 17); + m_backup_state = pmp; +} +*/ +} // namespace re_detail +} // namespace boost + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + diff --git a/ext/boost/regex/v4/perl_matcher_recursive.hpp b/ext/boost/regex/v4/perl_matcher_recursive.hpp new file mode 100644 index 0000000000..48f08b7b8d --- /dev/null +++ b/ext/boost/regex/v4/perl_matcher_recursive.hpp @@ -0,0 +1,992 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE perl_matcher_common.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Definitions of perl_matcher member functions that are + * specific to the recursive implementation. + */ + +#ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP +#define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4800) +#endif + +namespace boost{ +namespace re_detail{ + +template <class BidiIterator> +class backup_subex +{ + int index; + sub_match<BidiIterator> sub; +public: + template <class A> + backup_subex(const match_results<BidiIterator, A>& w, int i) + : index(i), sub(w[i], false) {} + template <class A> + void restore(match_results<BidiIterator, A>& w) + { + w.set_first(sub.first, index, index == 0); + w.set_second(sub.second, index, sub.matched, index == 0); + } + const sub_match<BidiIterator>& get() { return sub; } +}; + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() +{ + static matcher_proc_type const s_match_vtable[30] = + { + (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark), + &perl_matcher<BidiIterator, Allocator, traits>::match_endmark, + &perl_matcher<BidiIterator, Allocator, traits>::match_literal, + &perl_matcher<BidiIterator, Allocator, traits>::match_start_line, + &perl_matcher<BidiIterator, Allocator, traits>::match_end_line, + &perl_matcher<BidiIterator, Allocator, traits>::match_wild, + &perl_matcher<BidiIterator, Allocator, traits>::match_match, + &perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary, + &perl_matcher<BidiIterator, Allocator, traits>::match_within_word, + &perl_matcher<BidiIterator, Allocator, traits>::match_word_start, + &perl_matcher<BidiIterator, Allocator, traits>::match_word_end, + &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start, + &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end, + &perl_matcher<BidiIterator, Allocator, traits>::match_backref, + &perl_matcher<BidiIterator, Allocator, traits>::match_long_set, + &perl_matcher<BidiIterator, Allocator, traits>::match_set, + &perl_matcher<BidiIterator, Allocator, traits>::match_jump, + &perl_matcher<BidiIterator, Allocator, traits>::match_alt, + &perl_matcher<BidiIterator, Allocator, traits>::match_rep, + &perl_matcher<BidiIterator, Allocator, traits>::match_combining, + &perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end, + &perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue, + // Although this next line *should* be evaluated at compile time, in practice + // some compilers (VC++) emit run-time initialisation which breaks thread + // safety, so use a dispatch function instead: + //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow), + &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_dispatch, + &perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::match_backstep, + &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref, + &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case, + &perl_matcher<BidiIterator, Allocator, traits>::match_recursion, + }; + + if(state_count > max_state_count) + raise_error(traits_inst, regex_constants::error_space); + while(pstate) + { + matcher_proc_type proc = s_match_vtable[pstate->type]; + ++state_count; + if(!(this->*proc)()) + { + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + return 0; + } + } + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark() +{ + int index = static_cast<const re_brace*>(pstate)->index; + icase = static_cast<const re_brace*>(pstate)->icase; + bool r = true; + switch(index) + { + case 0: + pstate = pstate->next.p; + break; + case -1: + case -2: + { + // forward lookahead assert: + BidiIterator old_position(position); + const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + r = match_all_states(); + pstate = next_pstate; + position = old_position; + if((r && (index != -1)) || (!r && (index != -2))) + r = false; + else + r = true; + break; + } + case -3: + { + // independent sub-expression: + bool old_independent = m_independent; + m_independent = true; + const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + r = match_all_states(); + pstate = next_pstate; + m_independent = old_independent; +#ifdef BOOST_REGEX_MATCH_EXTRA + if(r && (m_match_flags & match_extra)) + { + // + // our captures have been stored in *m_presult + // we need to unpack them, and insert them + // back in the right order when we unwind the stack: + // + unsigned i; + match_results<BidiIterator, Allocator> tm(*m_presult); + for(i = 0; i < tm.size(); ++i) + (*m_presult)[i].get_captures().clear(); + // match everything else: + r = match_all_states(); + // now place the stored captures back: + for(i = 0; i < tm.size(); ++i) + { + typedef typename sub_match<BidiIterator>::capture_sequence_type seq; + seq& s1 = (*m_presult)[i].get_captures(); + const seq& s2 = tm[i].captures(); + s1.insert( + s1.end(), + s2.begin(), + s2.end()); + } + } +#endif + break; + } + case -4: + { + // conditional expression: + const re_alt* alt = static_cast<const re_alt*>(pstate->next.p); + BOOST_ASSERT(alt->type == syntax_element_alt); + pstate = alt->next.p; + if(pstate->type == syntax_element_assert_backref) + { + if(!match_assert_backref()) + pstate = alt->alt.p; + break; + } + else + { + // zero width assertion, have to match this recursively: + BOOST_ASSERT(pstate->type == syntax_element_startmark); + bool negated = static_cast<const re_brace*>(pstate)->index == -2; + BidiIterator saved_position = position; + const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + bool r = match_all_states(); + position = saved_position; + if(negated) + r = !r; + if(r) + pstate = next_pstate; + else + pstate = alt->alt.p; + break; + } + } + case -5: + { + // Reset start of $0, since we have a \K escape + backup_subex<BidiIterator> sub(*m_presult, 0); + m_presult->set_first(position, 0, true); + pstate = pstate->next.p; + r = match_all_states(); + if(r == false) + sub.restore(*m_presult); + break; + } + default: + { + BOOST_ASSERT(index > 0); + if((m_match_flags & match_nosubs) == 0) + { + backup_subex<BidiIterator> sub(*m_presult, index); + m_presult->set_first(position, index); + pstate = pstate->next.p; + r = match_all_states(); + if(r == false) + sub.restore(*m_presult); +#ifdef BOOST_REGEX_MATCH_EXTRA + // + // we have a match, push the capture information onto the stack: + // + else if(sub.get().matched && (match_extra & m_match_flags)) + ((*m_presult)[index]).get_captures().push_back(sub.get()); +#endif + } + else + { + pstate = pstate->next.p; + } + break; + } + } + return r; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_alt() +{ + bool take_first, take_second; + const re_alt* jmp = static_cast<const re_alt*>(pstate); + + // find out which of these two alternatives we need to take: + if(position == last) + { + take_first = jmp->can_be_null & mask_take; + take_second = jmp->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, jmp->_map, (unsigned char)mask_take); + take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip); + } + + if(take_first) + { + // we can take the first alternative, + // see if we need to push next alternative: + if(take_second) + { + BidiIterator oldposition(position); + const re_syntax_base* old_pstate = jmp->alt.p; + pstate = pstate->next.p; + if(!match_all_states()) + { + pstate = old_pstate; + position = oldposition; + } + return true; + } + pstate = pstate->next.p; + return true; + } + if(take_second) + { + pstate = jmp->alt.p; + return true; + } + return false; // neither option is possible +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_rep() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127 4244) +#endif + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + // + // Always copy the repeat count, so that the state is restored + // when we exit this scope: + // + repeater_count<BidiIterator> r(rep->state_id, &next_count, position); + // + // If we've had at least one repeat already, and the last one + // matched the NULL string then set the repeat count to + // maximum: + // + next_count->check_null_repeat(position, rep->max); + + // find out which of these two alternatives we need to take: + bool take_first, take_second; + if(position == last) + { + take_first = rep->can_be_null & mask_take; + take_second = rep->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, rep->_map, (unsigned char)mask_take); + take_second = can_start(*position, rep->_map, (unsigned char)mask_skip); + } + + if(next_count->get_count() < rep->min) + { + // we must take the repeat: + if(take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return match_all_states(); + } + return false; + } + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // try and take the repeat if we can: + if((next_count->get_count() < rep->max) && take_first) + { + // store position in case we fail: + BidiIterator pos = position; + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + if(match_all_states()) + return true; + // failed repeat, reset posistion and fall through for alternative: + position = pos; + } + if(take_second) + { + pstate = rep->alt.p; + return true; + } + return false; // can't take anything, fail... + } + else // non-greedy + { + // try and skip the repeat if we can: + if(take_second) + { + // store position in case we fail: + BidiIterator pos = position; + pstate = rep->alt.p; + if(match_all_states()) + return true; + // failed alternative, reset posistion and fall through for repeat: + position = pos; + } + if((next_count->get_count() < rep->max) && take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return match_all_states(); + } + } + return false; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + unsigned count = 0; + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + re_syntax_base* psingle = rep->next.p; + // match compulsary repeats first: + while(count < rep->min) + { + pstate = psingle; + if(!match_wild()) + return false; + ++count; + } + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // normal repeat: + while(count < rep->max) + { + pstate = psingle; + if(!match_wild()) + break; + ++count; + } + if((rep->leading) && (count < rep->max)) + restart = position; + pstate = rep; + return backtrack_till_match(count - rep->min); + } + else + { + // non-greedy, keep trying till we get a match: + BidiIterator save_pos; + do + { + if((rep->leading) && (rep->max == UINT_MAX)) + restart = position; + pstate = rep->alt.p; + save_pos = position; + ++state_count; + if(match_all_states()) + return true; + if(count >= rep->max) + return false; + ++count; + pstate = psingle; + position = save_pos; + if(!match_wild()) + return false; + }while(true); + } +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + if(m_match_flags & match_not_dot_null) + return match_dot_repeat_slow(); + if((static_cast<const re_dot*>(pstate->next.p)->mask & match_any_mask) == 0) + return match_dot_repeat_slow(); + // + // start by working out how much we can skip: + // + const re_repeat* rep = static_cast<const re_repeat*>(pstate); +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4267) +#endif + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t count = (std::min)(static_cast<std::size_t>(::boost::re_detail::distance(position, last)), static_cast<std::size_t>(greedy ? rep->max : rep->min)); + if(rep->min > count) + { + position = last; + return false; // not enough text left to match + } + std::advance(position, count); +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + if((rep->leading) && (count < rep->max) && greedy) + restart = position; + if(greedy) + return backtrack_till_match(count - rep->min); + + // non-greedy, keep trying till we get a match: + BidiIterator save_pos; + do + { + while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) + { + ++position; + ++count; + } + if((rep->leading) && (rep->max == UINT_MAX)) + restart = position; + pstate = rep->alt.p; + save_pos = position; + ++state_count; + if(match_all_states()) + return true; + if(count >= rep->max) + return false; + if(save_pos == last) + return false; + position = ++save_pos; + ++count; + }while(true); +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#pragma warning(disable:4267) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + BOOST_ASSERT(1 == static_cast<const re_literal*>(rep->next.p)->length); + const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1); + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t count, desired; + if(::boost::is_random_access_iterator<BidiIterator>::value) + { + desired = + (std::min)( + (std::size_t)(greedy ? rep->max : rep->min), + (std::size_t)::boost::re_detail::distance(position, last)); + count = desired; + ++desired; + if(icase) + { + while(--desired && (traits_inst.translate_nocase(*position) == what)) + { + ++position; + } + } + else + { + while(--desired && (traits_inst.translate(*position) == what)) + { + ++position; + } + } + count = count - desired; + } + else + { + count = 0; + desired = greedy ? rep->max : rep->min; + while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + ++count; + } + } + if((rep->leading) && (count < rep->max) && greedy) + restart = position; + if(count < rep->min) + return false; + + if(greedy) + return backtrack_till_match(count - rep->min); + + // non-greedy, keep trying till we get a match: + BidiIterator save_pos; + do + { + while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) + { + if((traits_inst.translate(*position, icase) == what)) + { + ++position; + ++count; + } + else + return false; // counldn't repeat even though it was the only option + } + if((rep->leading) && (rep->max == UINT_MAX)) + restart = position; + pstate = rep->alt.p; + save_pos = position; + ++state_count; + if(match_all_states()) + return true; + if(count >= rep->max) + return false; + position = save_pos; + if(position == last) + return false; + if(traits_inst.translate(*position, icase) == what) + { + ++position; + ++count; + } + else + { + return false; + } + }while(true); +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map; + unsigned count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::boost::is_random_access_iterator<BidiIterator>::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) + { + ++position; + } + count = (unsigned)::boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) + { + ++position; + ++count; + } + } + if((rep->leading) && (count < rep->max) && greedy) + restart = position; + if(count < rep->min) + return false; + + if(greedy) + return backtrack_till_match(count - rep->min); + + // non-greedy, keep trying till we get a match: + BidiIterator save_pos; + do + { + while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) + { + if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) + { + ++position; + ++count; + } + else + return false; // counldn't repeat even though it was the only option + } + if((rep->leading) && (rep->max == UINT_MAX)) + restart = position; + pstate = rep->alt.p; + save_pos = position; + ++state_count; + if(match_all_states()) + return true; + if(count >= rep->max) + return false; + position = save_pos; + if(position == last) + return false; + if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) + { + ++position; + ++count; + } + else + { + return false; + } + }while(true); +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + typedef typename traits::char_class_type char_class_type; + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + const re_set_long<char_class_type>* set = static_cast<const re_set_long<char_class_type>*>(pstate->next.p); + unsigned count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::boost::is_random_access_iterator<BidiIterator>::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + } + count = (unsigned)::boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + ++count; + } + } + if((rep->leading) && (count < rep->max) && greedy) + restart = position; + if(count < rep->min) + return false; + + if(greedy) + return backtrack_till_match(count - rep->min); + + // non-greedy, keep trying till we get a match: + BidiIterator save_pos; + do + { + while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) + { + if(position != re_is_set_member(position, last, set, re.get_data(), icase)) + { + ++position; + ++count; + } + else + return false; // counldn't repeat even though it was the only option + } + if((rep->leading) && (rep->max == UINT_MAX)) + restart = position; + pstate = rep->alt.p; + save_pos = position; + ++state_count; + if(match_all_states()) + return true; + if(count >= rep->max) + return false; + position = save_pos; + if(position == last) + return false; + if(position != re_is_set_member(position, last, set, re.get_data(), icase)) + { + ++position; + ++count; + } + else + { + return false; + } + }while(true); +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::backtrack_till_match(std::size_t count) +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + if((m_match_flags & match_partial) && (position == last)) + m_has_partial_match = true; + + const re_repeat* rep = static_cast<const re_repeat*>(pstate); + BidiIterator backtrack = position; + if(position == last) + { + if(rep->can_be_null & mask_skip) + { + pstate = rep->alt.p; + if(match_all_states()) + return true; + } + if(count) + { + position = --backtrack; + --count; + } + else + return false; + } + do + { + while(count && !can_start(*position, rep->_map, mask_skip)) + { + --position; + --count; + ++state_count; + } + pstate = rep->alt.p; + backtrack = position; + if(match_all_states()) + return true; + if(count == 0) + return false; + position = --backtrack; + ++state_count; + --count; + }while(true); +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion() +{ + BOOST_ASSERT(pstate->type == syntax_element_recurse); + // + // Set new call stack: + // + if(recursion_stack_position >= static_cast<int>(sizeof(recursion_stack)/sizeof(recursion_stack[0]))) + { + return false; + } + recursion_stack[recursion_stack_position].preturn_address = pstate->next.p; + recursion_stack[recursion_stack_position].results = *m_presult; + recursion_stack[recursion_stack_position].repeater_stack = next_count; + pstate = static_cast<const re_jump*>(pstate)->alt.p; + recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index; + ++recursion_stack_position; + + repeater_count<BidiIterator>* saved = next_count; + repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those + next_count = &r; + bool result = match_all_states(); + next_count = saved; + + if(!result) + { + --recursion_stack_position; + next_count = recursion_stack[recursion_stack_position].repeater_stack; + *m_presult = recursion_stack[recursion_stack_position].results; + return false; + } + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark() +{ + int index = static_cast<const re_brace*>(pstate)->index; + icase = static_cast<const re_brace*>(pstate)->icase; + if(index > 0) + { + if((m_match_flags & match_nosubs) == 0) + { + m_presult->set_second(position, index); + } + if(recursion_stack_position) + { + if(index == recursion_stack[recursion_stack_position-1].id) + { + --recursion_stack_position; + recursion_info<results_type> saved = recursion_stack[recursion_stack_position]; + const re_syntax_base* saved_state = pstate = saved.preturn_address; + repeater_count<BidiIterator>* saved_count = next_count; + next_count = saved.repeater_stack; + *m_presult = saved.results; + if(!match_all_states()) + { + recursion_stack[recursion_stack_position] = saved; + ++recursion_stack_position; + next_count = saved_count; + return false; + } + } + } + } + else if((index < 0) && (index != -4)) + { + // matched forward lookahead: + pstate = 0; + return true; + } + pstate = pstate ? pstate->next.p : 0; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_match() +{ + if(recursion_stack_position) + { + BOOST_ASSERT(0 == recursion_stack[recursion_stack_position-1].id); + --recursion_stack_position; + const re_syntax_base* saved_state = pstate = recursion_stack[recursion_stack_position].preturn_address; + *m_presult = recursion_stack[recursion_stack_position].results; + if(!match_all_states()) + { + recursion_stack[recursion_stack_position].preturn_address = saved_state; + recursion_stack[recursion_stack_position].results = *m_presult; + ++recursion_stack_position; + return false; + } + return true; + } + if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) + return false; + if((m_match_flags & match_all) && (position != last)) + return false; + if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) + return false; + m_presult->set_second(position); + pstate = 0; + m_has_found_match = true; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + if((m_match_flags & match_any) == 0) + return false; + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(match_extra & m_match_flags) + { + for(unsigned i = 0; i < m_presult->size(); ++i) + if((*m_presult)[i].matched) + ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); + } +#endif + return true; +} + + + +} // namespace re_detail +} // namespace boost +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/ext/boost/regex/v4/primary_transform.hpp b/ext/boost/regex/v4/primary_transform.hpp new file mode 100644 index 0000000000..989f500c1d --- /dev/null +++ b/ext/boost/regex/v4/primary_transform.hpp @@ -0,0 +1,146 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: primary_transform.hpp + * VERSION: see <boost/version.hpp> + * DESCRIPTION: Heuristically determines the sort string format in use + * by the current locale. + */ + +#ifndef BOOST_REGEX_PRIMARY_TRANSFORM +#define BOOST_REGEX_PRIMARY_TRANSFORM + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ + namespace re_detail{ + + +enum{ + sort_C, + sort_fixed, + sort_delim, + sort_unknown +}; + +template <class S, class charT> +unsigned count_chars(const S& s, charT c) +{ + // + // Count how many occurances of character c occur + // in string s: if c is a delimeter between collation + // fields, then this should be the same value for all + // sort keys: + // + unsigned int count = 0; + for(unsigned pos = 0; pos < s.size(); ++pos) + { + if(s[pos] == c) ++count; + } + return count; +} + + +template <class traits, class charT> +unsigned find_sort_syntax(const traits* pt, charT* delim) +{ + // + // compare 'a' with 'A' to see how similar they are, + // should really use a-accute but we can't portably do that, + // + typedef typename traits::string_type string_type; + typedef typename traits::char_type char_type; + + // Suppress incorrect warning for MSVC + (void)pt; + + char_type a[2] = {'a', '\0', }; + string_type sa(pt->transform(a, a+1)); + if(sa == a) + { + *delim = 0; + return sort_C; + } + char_type A[2] = { 'A', '\0', }; + string_type sA(pt->transform(A, A+1)); + char_type c[2] = { ';', '\0', }; + string_type sc(pt->transform(c, c+1)); + + int pos = 0; + while((pos <= static_cast<int>(sa.size())) && (pos <= static_cast<int>(sA.size())) && (sa[pos] == sA[pos])) ++pos; + --pos; + if(pos < 0) + { + *delim = 0; + return sort_unknown; + } + // + // at this point sa[pos] is either the end of a fixed width field + // or the character that acts as a delimiter: + // + charT maybe_delim = sa[pos]; + if((pos != 0) && (count_chars(sa, maybe_delim) == count_chars(sA, maybe_delim)) && (count_chars(sa, maybe_delim) == count_chars(sc, maybe_delim))) + { + *delim = maybe_delim; + return sort_delim; + } + // + // OK doen't look like a delimiter, try for fixed width field: + // + if((sa.size() == sA.size()) && (sa.size() == sc.size())) + { + // note assumes that the fixed width field is less than + // (numeric_limits<charT>::max)(), should be true for all types + // I can't imagine 127 character fields... + *delim = static_cast<charT>(++pos); + return sort_fixed; + } + // + // don't know what it is: + // + *delim = 0; + return sort_unknown; +} + + + } // namespace re_detail +} // namespace boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + + + + + + diff --git a/ext/boost/regex/v4/protected_call.hpp b/ext/boost/regex/v4/protected_call.hpp new file mode 100644 index 0000000000..ebf15ba370 --- /dev/null +++ b/ext/boost/regex/v4/protected_call.hpp @@ -0,0 +1,81 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_creator.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares template class basic_regex_creator which fills in + * the data members of a regex_data object. + */ + +#ifndef BOOST_REGEX_V4_PROTECTED_CALL_HPP +#define BOOST_REGEX_V4_PROTECTED_CALL_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ +namespace re_detail{ + +class BOOST_REGEX_DECL abstract_protected_call +{ +public: + bool BOOST_REGEX_CALL execute()const; + // this stops gcc-4 from complaining: + virtual ~abstract_protected_call(){} +private: + virtual bool call()const = 0; +}; + +template <class T> +class concrete_protected_call + : public abstract_protected_call +{ +public: + typedef bool (T::*proc_type)(); + concrete_protected_call(T* o, proc_type p) + : obj(o), proc(p) {} +private: + virtual bool call()const; + T* obj; + proc_type proc; +}; + +template <class T> +bool concrete_protected_call<T>::call()const +{ + return (obj->*proc)(); +} + +} +} // namespace boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif diff --git a/ext/boost/regex/v4/regbase.hpp b/ext/boost/regex/v4/regbase.hpp new file mode 100644 index 0000000000..2b737d5aba --- /dev/null +++ b/ext/boost/regex/v4/regbase.hpp @@ -0,0 +1,180 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regbase.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares class regbase. + */ + +#ifndef BOOST_REGEX_V4_REGBASE_HPP +#define BOOST_REGEX_V4_REGBASE_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ +// +// class regbase +// handles error codes and flags +// +class BOOST_REGEX_DECL regbase +{ +public: + enum flag_type_ + { + // + // Divide the flags up into logical groups: + // bits 0-7 indicate main synatx type. + // bits 8-15 indicate syntax subtype. + // bits 16-31 indicate options that are common to all + // regex syntaxes. + // In all cases the default is 0. + // + // Main synatx group: + // + perl_syntax_group = 0, // default + basic_syntax_group = 1, // POSIX basic + literal = 2, // all characters are literals + main_option_type = literal | basic_syntax_group | perl_syntax_group, // everything! + // + // options specific to perl group: + // + no_bk_refs = 1 << 8, // \d not allowed + no_perl_ex = 1 << 9, // disable perl extensions + no_mod_m = 1 << 10, // disable Perl m modifier + mod_x = 1 << 11, // Perl x modifier + mod_s = 1 << 12, // force s modifier on (overrides match_not_dot_newline) + no_mod_s = 1 << 13, // force s modifier off (overrides match_not_dot_newline) + + // + // options specific to basic group: + // + no_char_classes = 1 << 8, // [[:CLASS:]] not allowed + no_intervals = 1 << 9, // {x,y} not allowed + bk_plus_qm = 1 << 10, // uses \+ and \? + bk_vbar = 1 << 11, // use \| for alternatives + emacs_ex = 1 << 12, // enables emacs extensions + + // + // options common to all groups: + // + no_escape_in_lists = 1 << 16, // '\' not special inside [...] + newline_alt = 1 << 17, // \n is the same as | + no_except = 1 << 18, // no exception on error + failbit = 1 << 19, // error flag + icase = 1 << 20, // characters are matched regardless of case + nocollate = 0, // don't use locale specific collation (deprecated) + collate = 1 << 21, // use locale specific collation + nosubs = 1 << 22, // don't mark sub-expressions + save_subexpression_location = 1 << 23, // save subexpression locations + no_empty_expressions = 1 << 24, // no empty expressions allowed + optimize = 0, // not really supported + + + + basic = basic_syntax_group | collate | no_escape_in_lists, + extended = no_bk_refs | collate | no_perl_ex | no_escape_in_lists, + normal = 0, + emacs = basic_syntax_group | collate | emacs_ex | bk_vbar, + awk = no_bk_refs | collate | no_perl_ex, + grep = basic | newline_alt, + egrep = extended | newline_alt, + sed = basic, + perl = normal, + ECMAScript = normal, + JavaScript = normal, + JScript = normal + }; + typedef unsigned int flag_type; + + enum restart_info + { + restart_any = 0, + restart_word = 1, + restart_line = 2, + restart_buf = 3, + restart_continue = 4, + restart_lit = 5, + restart_fixed_lit = 6, + restart_count = 7 + }; +}; + +// +// provide std lib proposal compatible constants: +// +namespace regex_constants{ + + enum flag_type_ + { + + no_except = ::boost::regbase::no_except, + failbit = ::boost::regbase::failbit, + literal = ::boost::regbase::literal, + icase = ::boost::regbase::icase, + nocollate = ::boost::regbase::nocollate, + collate = ::boost::regbase::collate, + nosubs = ::boost::regbase::nosubs, + optimize = ::boost::regbase::optimize, + bk_plus_qm = ::boost::regbase::bk_plus_qm, + bk_vbar = ::boost::regbase::bk_vbar, + no_intervals = ::boost::regbase::no_intervals, + no_char_classes = ::boost::regbase::no_char_classes, + no_escape_in_lists = ::boost::regbase::no_escape_in_lists, + no_mod_m = ::boost::regbase::no_mod_m, + mod_x = ::boost::regbase::mod_x, + mod_s = ::boost::regbase::mod_s, + no_mod_s = ::boost::regbase::no_mod_s, + save_subexpression_location = ::boost::regbase::save_subexpression_location, + no_empty_expressions = ::boost::regbase::no_empty_expressions, + + basic = ::boost::regbase::basic, + extended = ::boost::regbase::extended, + normal = ::boost::regbase::normal, + emacs = ::boost::regbase::emacs, + awk = ::boost::regbase::awk, + grep = ::boost::regbase::grep, + egrep = ::boost::regbase::egrep, + sed = basic, + perl = normal, + ECMAScript = normal, + JavaScript = normal, + JScript = normal + }; + typedef ::boost::regbase::flag_type syntax_option_type; + +} // namespace regex_constants + +} // namespace boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/ext/boost/regex/v4/regex.hpp b/ext/boost/regex/v4/regex.hpp new file mode 100644 index 0000000000..7cc260a3ac --- /dev/null +++ b/ext/boost/regex/v4/regex.hpp @@ -0,0 +1,202 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares boost::basic_regex<> and associated + * functions and classes. This header is the main + * entry point for the template regex code. + */ + +#ifndef BOOST_RE_REGEX_HPP_INCLUDED +#define BOOST_RE_REGEX_HPP_INCLUDED + +#ifdef __cplusplus + +// what follows is all C++ don't include in C builds!! + +#ifndef BOOST_REGEX_CONFIG_HPP +#include <boost/regex/config.hpp> +#endif +#ifndef BOOST_REGEX_WORKAROUND_HPP +#include <boost/regex/v4/regex_workaround.hpp> +#endif + +#ifndef BOOST_REGEX_FWD_HPP +#include <boost/regex_fwd.hpp> +#endif +#ifndef BOOST_REGEX_TRAITS_HPP +#include <boost/regex/regex_traits.hpp> +#endif +#ifndef BOOST_REGEX_RAW_BUFFER_HPP +#include <boost/regex/v4/error_type.hpp> +#endif +#ifndef BOOST_REGEX_V4_MATCH_FLAGS +#include <boost/regex/v4/match_flags.hpp> +#endif +#ifndef BOOST_REGEX_RAW_BUFFER_HPP +#include <boost/regex/v4/regex_raw_buffer.hpp> +#endif +#ifndef BOOST_RE_PAT_EXCEPT_HPP +#include <boost/regex/pattern_except.hpp> +#endif + +#ifndef BOOST_REGEX_V4_CHAR_REGEX_TRAITS_HPP +#include <boost/regex/v4/char_regex_traits.hpp> +#endif +#ifndef BOOST_REGEX_V4_STATES_HPP +#include <boost/regex/v4/states.hpp> +#endif +#ifndef BOOST_REGEX_V4_REGBASE_HPP +#include <boost/regex/v4/regbase.hpp> +#endif +#ifndef BOOST_REGEX_V4_ITERATOR_TRAITS_HPP +#include <boost/regex/v4/iterator_traits.hpp> +#endif +#ifndef BOOST_REGEX_V4_BASIC_REGEX_HPP +#include <boost/regex/v4/basic_regex.hpp> +#endif +#ifndef BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP +#include <boost/regex/v4/basic_regex_creator.hpp> +#endif +#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP +#include <boost/regex/v4/basic_regex_parser.hpp> +#endif +#ifndef BOOST_REGEX_V4_SUB_MATCH_HPP +#include <boost/regex/v4/sub_match.hpp> +#endif +#ifndef BOOST_REGEX_FORMAT_HPP +#include <boost/regex/v4/regex_format.hpp> +#endif +#ifndef BOOST_REGEX_V4_MATCH_RESULTS_HPP +#include <boost/regex/v4/match_results.hpp> +#endif +#ifndef BOOST_REGEX_V4_PROTECTED_CALL_HPP +#include <boost/regex/v4/protected_call.hpp> +#endif +#ifndef BOOST_REGEX_MATCHER_HPP +#include <boost/regex/v4/perl_matcher.hpp> +#endif +// +// template instances: +// +#define BOOST_REGEX_CHAR_T char +#ifdef BOOST_REGEX_NARROW_INSTANTIATE +# define BOOST_REGEX_INSTANTIATE +#endif +#include <boost/regex/v4/instances.hpp> +#undef BOOST_REGEX_CHAR_T +#ifdef BOOST_REGEX_INSTANTIATE +# undef BOOST_REGEX_INSTANTIATE +#endif + +#ifndef BOOST_NO_WREGEX +#define BOOST_REGEX_CHAR_T wchar_t +#ifdef BOOST_REGEX_WIDE_INSTANTIATE +# define BOOST_REGEX_INSTANTIATE +#endif +#include <boost/regex/v4/instances.hpp> +#undef BOOST_REGEX_CHAR_T +#ifdef BOOST_REGEX_INSTANTIATE +# undef BOOST_REGEX_INSTANTIATE +#endif +#endif + +#if !defined(BOOST_NO_WREGEX) && defined(BOOST_REGEX_HAS_OTHER_WCHAR_T) +#define BOOST_REGEX_CHAR_T unsigned short +#ifdef BOOST_REGEX_US_INSTANTIATE +# define BOOST_REGEX_INSTANTIATE +#endif +#include <boost/regex/v4/instances.hpp> +#undef BOOST_REGEX_CHAR_T +#ifdef BOOST_REGEX_INSTANTIATE +# undef BOOST_REGEX_INSTANTIATE +#endif +#endif + + +namespace boost{ +#ifdef BOOST_REGEX_NO_FWD +typedef basic_regex<char, regex_traits<char> > regex; +#ifndef BOOST_NO_WREGEX +typedef basic_regex<wchar_t, regex_traits<wchar_t> > wregex; +#endif +#endif + +typedef match_results<const char*> cmatch; +typedef match_results<std::string::const_iterator> smatch; +#ifndef BOOST_NO_WREGEX +typedef match_results<const wchar_t*> wcmatch; +typedef match_results<std::wstring::const_iterator> wsmatch; +#endif + +} // namespace boost +#ifndef BOOST_REGEX_MATCH_HPP +#include <boost/regex/v4/regex_match.hpp> +#endif +#ifndef BOOST_REGEX_V4_REGEX_SEARCH_HPP +#include <boost/regex/v4/regex_search.hpp> +#endif +#ifndef BOOST_REGEX_ITERATOR_HPP +#include <boost/regex/v4/regex_iterator.hpp> +#endif +#ifndef BOOST_REGEX_TOKEN_ITERATOR_HPP +#include <boost/regex/v4/regex_token_iterator.hpp> +#endif +#ifndef BOOST_REGEX_V4_REGEX_GREP_HPP +#include <boost/regex/v4/regex_grep.hpp> +#endif +#ifndef BOOST_REGEX_V4_REGEX_REPLACE_HPP +#include <boost/regex/v4/regex_replace.hpp> +#endif +#ifndef BOOST_REGEX_V4_REGEX_MERGE_HPP +#include <boost/regex/v4/regex_merge.hpp> +#endif +#ifndef BOOST_REGEX_SPLIT_HPP +#include <boost/regex/v4/regex_split.hpp> +#endif + +#endif // __cplusplus + +#endif // include + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/ext/boost/regex/v4/regex_format.hpp b/ext/boost/regex/v4/regex_format.hpp new file mode 100644 index 0000000000..4e95112f90 --- /dev/null +++ b/ext/boost/regex/v4/regex_format.hpp @@ -0,0 +1,829 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_FORMAT_HPP +#define BOOST_REGEX_FORMAT_HPP + + +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +// +// Forward declaration: +// + template <class BidiIterator, class Allocator = BOOST_DEDUCED_TYPENAME std::vector<sub_match<BidiIterator> >::allocator_type > +class match_results; + +namespace re_detail{ + +// +// struct trivial_format_traits: +// defines minimum localisation support for formatting +// in the case that the actual regex traits is unavailable. +// +template <class charT> +struct trivial_format_traits +{ + typedef charT char_type; + + static std::ptrdiff_t length(const charT* p) + { + return global_length(p); + } + static charT tolower(charT c) + { + return ::boost::re_detail::global_lower(c); + } + static charT toupper(charT c) + { + return ::boost::re_detail::global_upper(c); + } + static int value(const charT c, int radix) + { + int result = global_value(c); + return result >= radix ? -1 : result; + } + int toi(const charT*& p1, const charT* p2, int radix)const + { + return global_toi(p1, p2, radix, *this); + } +}; + +template <class OutputIterator, class Results, class traits> +class basic_regex_formatter +{ +public: + typedef typename traits::char_type char_type; + basic_regex_formatter(OutputIterator o, const Results& r, const traits& t) + : m_traits(t), m_results(r), m_out(o), m_state(output_copy), m_restore_state(output_copy), m_have_conditional(false) {} + OutputIterator format(const char_type* p1, const char_type* p2, match_flag_type f); + OutputIterator format(const char_type* p1, match_flag_type f) + { + return format(p1, p1 + m_traits.length(p1), f); + } +private: + typedef typename Results::value_type sub_match_type; + enum output_state + { + output_copy, + output_next_lower, + output_next_upper, + output_lower, + output_upper, + output_none + }; + + void put(char_type c); + void put(const sub_match_type& sub); + void format_all(); + void format_perl(); + void format_escape(); + void format_conditional(); + void format_until_scope_end(); + bool handle_perl_verb(bool have_brace); + + const traits& m_traits; // the traits class for localised formatting operations + const Results& m_results; // the match_results being used. + OutputIterator m_out; // where to send output. + const char_type* m_position; // format string, current position + const char_type* m_end; // format string end + match_flag_type m_flags; // format flags to use + output_state m_state; // what to do with the next character + output_state m_restore_state; // what state to restore to. + bool m_have_conditional; // we are parsing a conditional +private: + basic_regex_formatter(const basic_regex_formatter&); + basic_regex_formatter& operator=(const basic_regex_formatter&); +}; + +template <class OutputIterator, class Results, class traits> +OutputIterator basic_regex_formatter<OutputIterator, Results, traits>::format(const char_type* p1, const char_type* p2, match_flag_type f) +{ + m_position = p1; + m_end = p2; + m_flags = f; + format_all(); + return m_out; +} + +template <class OutputIterator, class Results, class traits> +void basic_regex_formatter<OutputIterator, Results, traits>::format_all() +{ + // over and over: + while(m_position != m_end) + { + switch(*m_position) + { + case '&': + if(m_flags & ::boost::regex_constants::format_sed) + { + ++m_position; + put(m_results[0]); + break; + } + put(*m_position++); + break; + case '\\': + format_escape(); + break; + case '(': + if(m_flags & boost::regex_constants::format_all) + { + ++m_position; + bool have_conditional = m_have_conditional; + m_have_conditional = false; + format_until_scope_end(); + m_have_conditional = have_conditional; + if(m_position == m_end) + return; + BOOST_ASSERT(*m_position == static_cast<char_type>(')')); + ++m_position; // skip the closing ')' + break; + } + put(*m_position); + ++m_position; + break; + case ')': + if(m_flags & boost::regex_constants::format_all) + { + return; + } + put(*m_position); + ++m_position; + break; + case ':': + if((m_flags & boost::regex_constants::format_all) && m_have_conditional) + { + return; + } + put(*m_position); + ++m_position; + break; + case '?': + if(m_flags & boost::regex_constants::format_all) + { + ++m_position; + format_conditional(); + break; + } + put(*m_position); + ++m_position; + break; + case '$': + if((m_flags & format_sed) == 0) + { + format_perl(); + break; + } + // fall through, not a special character: + default: + put(*m_position); + ++m_position; + break; + } + } +} + +template <class OutputIterator, class Results, class traits> +void basic_regex_formatter<OutputIterator, Results, traits>::format_perl() +{ + // + // On entry *m_position points to a '$' character + // output the information that goes with it: + // + BOOST_ASSERT(*m_position == '$'); + // + // see if this is a trailing '$': + // + if(++m_position == m_end) + { + --m_position; + put(*m_position); + ++m_position; + return; + } + // + // OK find out what kind it is: + // + bool have_brace = false; + const char_type* save_position = m_position; + switch(*m_position) + { + case '&': + ++m_position; + put(this->m_results[0]); + break; + case '`': + ++m_position; + put(this->m_results.prefix()); + break; + case '\'': + ++m_position; + put(this->m_results.suffix()); + break; + case '$': + put(*m_position++); + break; + case '+': + if((++m_position != m_end) && (*m_position == '{')) + { + const char_type* base = ++m_position; + while((m_position != m_end) && (*m_position != '}')) ++m_position; + if(m_position != m_end) + { + // Named sub-expression: + put(this->m_results.named_subexpression(base, m_position)); + ++m_position; + break; + } + else + { + m_position = --base; + } + } + put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]); + break; + case '{': + have_brace = true; + ++m_position; + // fall through.... + default: + // see if we have a number: + { + std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); + //len = (std::min)(static_cast<std::ptrdiff_t>(2), len); + int v = m_traits.toi(m_position, m_position + len, 10); + if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}')))) + { + // Look for a Perl-5.10 verb: + if(!handle_perl_verb(have_brace)) + { + // leave the $ as is, and carry on: + m_position = --save_position; + put(*m_position); + ++m_position; + } + break; + } + // otherwise output sub v: + put(this->m_results[v]); + if(have_brace) + ++m_position; + } + } +} + +template <class OutputIterator, class Results, class traits> +bool basic_regex_formatter<OutputIterator, Results, traits>::handle_perl_verb(bool have_brace) +{ + // + // We may have a capitalised string containing a Perl action: + // + static const char_type MATCH[] = { 'M', 'A', 'T', 'C', 'H' }; + static const char_type PREMATCH[] = { 'P', 'R', 'E', 'M', 'A', 'T', 'C', 'H' }; + static const char_type POSTMATCH[] = { 'P', 'O', 'S', 'T', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_PAREN_MATCH[] = { 'L', 'A', 'S', 'T', '_', 'P', 'A', 'R', 'E', 'N', '_', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_SUBMATCH_RESULT[] = { 'L', 'A', 'S', 'T', '_', 'S', 'U', 'B', 'M', 'A', 'T', 'C', 'H', '_', 'R', 'E', 'S', 'U', 'L', 'T' }; + static const char_type LAST_SUBMATCH_RESULT_ALT[] = { '^', 'N' }; + + if(have_brace && (*m_position == '^')) + ++m_position; + + int max_len = m_end - m_position; + + if((max_len >= 5) && std::equal(m_position, m_position + 5, MATCH)) + { + m_position += 5; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 5; + return false; + } + } + put(this->m_results[0]); + return true; + } + if((max_len >= 8) && std::equal(m_position, m_position + 8, PREMATCH)) + { + m_position += 8; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 8; + return false; + } + } + put(this->m_results.prefix()); + return true; + } + if((max_len >= 9) && std::equal(m_position, m_position + 9, POSTMATCH)) + { + m_position += 9; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 9; + return false; + } + } + put(this->m_results.suffix()); + return true; + } + if((max_len >= 16) && std::equal(m_position, m_position + 16, LAST_PAREN_MATCH)) + { + m_position += 16; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 16; + return false; + } + } + put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]); + return true; + } + if((max_len >= 20) && std::equal(m_position, m_position + 20, LAST_SUBMATCH_RESULT)) + { + m_position += 20; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 20; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + if((max_len >= 2) && std::equal(m_position, m_position + 2, LAST_SUBMATCH_RESULT_ALT)) + { + m_position += 2; + if(have_brace) + { + if(*m_position == '}') + ++m_position; + else + { + m_position -= 2; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + return false; +} + +template <class OutputIterator, class Results, class traits> +void basic_regex_formatter<OutputIterator, Results, traits>::format_escape() +{ + // skip the escape and check for trailing escape: + if(++m_position == m_end) + { + put(static_cast<char_type>('\\')); + return; + } + // now switch on the escape type: + switch(*m_position) + { + case 'a': + put(static_cast<char_type>('\a')); + ++m_position; + break; + case 'f': + put(static_cast<char_type>('\f')); + ++m_position; + break; + case 'n': + put(static_cast<char_type>('\n')); + ++m_position; + break; + case 'r': + put(static_cast<char_type>('\r')); + ++m_position; + break; + case 't': + put(static_cast<char_type>('\t')); + ++m_position; + break; + case 'v': + put(static_cast<char_type>('\v')); + ++m_position; + break; + case 'x': + if(++m_position == m_end) + { + put(static_cast<char_type>('x')); + return; + } + // maybe have \x{ddd} + if(*m_position == static_cast<char_type>('{')) + { + ++m_position; + int val = m_traits.toi(m_position, m_end, 16); + if(val < 0) + { + // invalid value treat everything as literals: + put(static_cast<char_type>('x')); + put(static_cast<char_type>('{')); + return; + } + if(*m_position != static_cast<char_type>('}')) + { + while(*m_position != static_cast<char_type>('\\')) + --m_position; + ++m_position; + put(*m_position++); + return; + } + ++m_position; + put(static_cast<char_type>(val)); + return; + } + else + { + std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); + len = (std::min)(static_cast<std::ptrdiff_t>(2), len); + int val = m_traits.toi(m_position, m_position + len, 16); + if(val < 0) + { + --m_position; + put(*m_position++); + return; + } + put(static_cast<char_type>(val)); + } + break; + case 'c': + if(++m_position == m_end) + { + --m_position; + put(*m_position++); + return; + } + put(static_cast<char_type>(*m_position++ % 32)); + break; + case 'e': + put(static_cast<char_type>(27)); + ++m_position; + break; + default: + // see if we have a perl specific escape: + if((m_flags & boost::regex_constants::format_sed) == 0) + { + bool breakout = false; + switch(*m_position) + { + case 'l': + ++m_position; + m_restore_state = m_state; + m_state = output_next_lower; + breakout = true; + break; + case 'L': + ++m_position; + m_state = output_lower; + breakout = true; + break; + case 'u': + ++m_position; + m_restore_state = m_state; + m_state = output_next_upper; + breakout = true; + break; + case 'U': + ++m_position; + m_state = output_upper; + breakout = true; + break; + case 'E': + ++m_position; + m_state = output_copy; + breakout = true; + break; + } + if(breakout) + break; + } + // see if we have a \n sed style backreference: + int v = m_traits.toi(m_position, m_position+1, 10); + if((v > 0) || ((v == 0) && (m_flags & ::boost::regex_constants::format_sed))) + { + put(m_results[v]); + break; + } + else if(v == 0) + { + // octal ecape sequence: + --m_position; + std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); + len = (std::min)(static_cast<std::ptrdiff_t>(4), len); + v = m_traits.toi(m_position, m_position + len, 8); + BOOST_ASSERT(v >= 0); + put(static_cast<char_type>(v)); + break; + } + // Otherwise output the character "as is": + put(*m_position++); + break; + } +} + +template <class OutputIterator, class Results, class traits> +void basic_regex_formatter<OutputIterator, Results, traits>::format_conditional() +{ + if(m_position == m_end) + { + // oops trailing '?': + put(static_cast<char_type>('?')); + return; + } + int v; + if(*m_position == '{') + { + const char_type* base = m_position; + ++m_position; + v = m_traits.toi(m_position, m_end, 10); + if(v < 0) + { + // Try a named subexpression: + while((m_position != m_end) && (*m_position != '}')) + ++m_position; + v = m_results.named_subexpression_index(base + 1, m_position); + } + if((v < 0) || (*m_position != '}')) + { + m_position = base; + // oops trailing '?': + put(static_cast<char_type>('?')); + return; + } + // Skip trailing '}': + ++m_position; + } + else + { + std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); + len = (std::min)(static_cast<std::ptrdiff_t>(2), len); + v = m_traits.toi(m_position, m_position + len, 10); + } + if(v < 0) + { + // oops not a number: + put(static_cast<char_type>('?')); + return; + } + + // output varies depending upon whether sub-expression v matched or not: + if(m_results[v].matched) + { + m_have_conditional = true; + format_all(); + m_have_conditional = false; + if((m_position != m_end) && (*m_position == static_cast<char_type>(':'))) + { + // skip the ':': + ++m_position; + // save output state, then turn it off: + output_state saved_state = m_state; + m_state = output_none; + // format the rest of this scope: + format_until_scope_end(); + // restore output state: + m_state = saved_state; + } + } + else + { + // save output state, then turn it off: + output_state saved_state = m_state; + m_state = output_none; + // format until ':' or ')': + m_have_conditional = true; + format_all(); + m_have_conditional = false; + // restore state: + m_state = saved_state; + if((m_position != m_end) && (*m_position == static_cast<char_type>(':'))) + { + // skip the ':': + ++m_position; + // format the rest of this scope: + format_until_scope_end(); + } + } +} + +template <class OutputIterator, class Results, class traits> +void basic_regex_formatter<OutputIterator, Results, traits>::format_until_scope_end() +{ + do + { + format_all(); + if((m_position == m_end) || (*m_position == static_cast<char_type>(')'))) + return; + put(*m_position++); + }while(m_position != m_end); +} + +template <class OutputIterator, class Results, class traits> +void basic_regex_formatter<OutputIterator, Results, traits>::put(char_type c) +{ + // write a single character to output + // according to which case translation mode we are in: + switch(this->m_state) + { + case output_none: + return; + case output_next_lower: + c = m_traits.tolower(c); + this->m_state = m_restore_state; + break; + case output_next_upper: + c = m_traits.toupper(c); + this->m_state = m_restore_state; + break; + case output_lower: + c = m_traits.tolower(c); + break; + case output_upper: + c = m_traits.toupper(c); + break; + default: + break; + } + *m_out = c; + ++m_out; +} + +template <class OutputIterator, class Results, class traits> +void basic_regex_formatter<OutputIterator, Results, traits>::put(const sub_match_type& sub) +{ + typedef typename sub_match_type::iterator iterator_type; + iterator_type i = sub.first; + while(i != sub.second) + { + put(*i); + ++i; + } +} + +template <class S> +class string_out_iterator +#ifndef BOOST_NO_STD_ITERATOR + : public std::iterator<std::output_iterator_tag, typename S::value_type> +#endif +{ + S* out; +public: + string_out_iterator(S& s) : out(&s) {} + string_out_iterator& operator++() { return *this; } + string_out_iterator& operator++(int) { return *this; } + string_out_iterator& operator*() { return *this; } + string_out_iterator& operator=(typename S::value_type v) + { + out->append(1, v); + return *this; + } + +#ifdef BOOST_NO_STD_ITERATOR + typedef std::ptrdiff_t difference_type; + typedef typename S::value_type value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef std::output_iterator_tag iterator_category; +#endif +}; + +template <class OutputIterator, class Iterator, class Alloc, class charT, class traits> +OutputIterator regex_format_imp(OutputIterator out, + const match_results<Iterator, Alloc>& m, + const charT* p1, const charT* p2, + match_flag_type flags, + const traits& t + ) +{ + if(flags & regex_constants::format_literal) + { + return re_detail::copy(p1, p2, out); + } + + re_detail::basic_regex_formatter< + OutputIterator, + match_results<Iterator, Alloc>, + traits > f(out, m, t); + return f.format(p1, p2, flags); +} + + +} // namespace re_detail + +template <class OutputIterator, class Iterator, class charT> +OutputIterator regex_format(OutputIterator out, + const match_results<Iterator>& m, + const charT* fmt, + match_flag_type flags = format_all + ) +{ + re_detail::trivial_format_traits<charT> traits; + return re_detail::regex_format_imp(out, m, fmt, fmt + traits.length(fmt), flags, traits); +} + +template <class OutputIterator, class Iterator, class charT> +OutputIterator regex_format(OutputIterator out, + const match_results<Iterator>& m, + const std::basic_string<charT>& fmt, + match_flag_type flags = format_all + ) +{ + re_detail::trivial_format_traits<charT> traits; + return re_detail::regex_format_imp(out, m, fmt.data(), fmt.data() + fmt.size(), flags, traits); +} + +template <class Iterator, class charT> +std::basic_string<charT> regex_format(const match_results<Iterator>& m, + const charT* fmt, + match_flag_type flags = format_all) +{ + std::basic_string<charT> result; + re_detail::string_out_iterator<std::basic_string<charT> > i(result); + re_detail::trivial_format_traits<charT> traits; + re_detail::regex_format_imp(i, m, fmt, fmt + traits.length(fmt), flags, traits); + return result; +} + +template <class Iterator, class charT> +std::basic_string<charT> regex_format(const match_results<Iterator>& m, + const std::basic_string<charT>& fmt, + match_flag_type flags = format_all) +{ + std::basic_string<charT> result; + re_detail::string_out_iterator<std::basic_string<charT> > i(result); + re_detail::trivial_format_traits<charT> traits; + re_detail::regex_format_imp(i, m, fmt.data(), fmt.data() + fmt.size(), flags, traits); + return result; +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif // BOOST_REGEX_FORMAT_HPP + + + + + + diff --git a/ext/boost/regex/v4/regex_fwd.hpp b/ext/boost/regex/v4/regex_fwd.hpp new file mode 100644 index 0000000000..3076b069ac --- /dev/null +++ b/ext/boost/regex/v4/regex_fwd.hpp @@ -0,0 +1,73 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_fwd.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Forward declares boost::basic_regex<> and + * associated typedefs. + */ + +#ifndef BOOST_REGEX_FWD_HPP_INCLUDED +#define BOOST_REGEX_FWD_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include <boost/regex/config.hpp> +#endif + +// +// define BOOST_REGEX_NO_FWD if this +// header doesn't work! +// +#ifdef BOOST_REGEX_NO_FWD +# ifndef BOOST_RE_REGEX_HPP +# include <boost/regex.hpp> +# endif +#else + +namespace boost{ + +template <class charT> +class cpp_regex_traits; +template <class charT> +struct c_regex_traits; +template <class charT> +class w32_regex_traits; + +#ifdef BOOST_REGEX_USE_WIN32_LOCALE +template <class charT, class implementationT = w32_regex_traits<charT> > +struct regex_traits; +#elif defined(BOOST_REGEX_USE_CPP_LOCALE) +template <class charT, class implementationT = cpp_regex_traits<charT> > +struct regex_traits; +#else +template <class charT, class implementationT = c_regex_traits<charT> > +struct regex_traits; +#endif + +template <class charT, class traits = regex_traits<charT> > +class basic_regex; + +typedef basic_regex<char, regex_traits<char> > regex; +#ifndef BOOST_NO_WREGEX +typedef basic_regex<wchar_t, regex_traits<wchar_t> > wregex; +#endif + +} // namespace boost + +#endif // BOOST_REGEX_NO_FWD + +#endif + + + + diff --git a/ext/boost/regex/v4/regex_grep.hpp b/ext/boost/regex/v4/regex_grep.hpp new file mode 100644 index 0000000000..3a3d906ecb --- /dev/null +++ b/ext/boost/regex/v4/regex_grep.hpp @@ -0,0 +1,155 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_grep.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Provides regex_grep implementation. + */ + +#ifndef BOOST_REGEX_V4_REGEX_GREP_HPP +#define BOOST_REGEX_V4_REGEX_GREP_HPP + + +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +// +// regex_grep: +// find all non-overlapping matches within the sequence first last: +// +template <class Predicate, class BidiIterator, class charT, class traits> +inline unsigned int regex_grep(Predicate foo, + BidiIterator first, + BidiIterator last, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + if(e.flags() & regex_constants::failbit) + return false; + + typedef typename match_results<BidiIterator>::allocator_type match_allocator_type; + + match_results<BidiIterator> m; + re_detail::perl_matcher<BidiIterator, match_allocator_type, traits> matcher(first, last, m, e, flags, first); + unsigned int count = 0; + while(matcher.find()) + { + ++count; + if(0 == foo(m)) + return count; // caller doesn't want to go on + if(m[0].second == last) + return count; // we've reached the end, don't try and find an extra null match. + if(m.length() == 0) + { + if(m[0].second == last) + return count; + // we found a NULL-match, now try to find + // a non-NULL one at the same position: + match_results<BidiIterator, match_allocator_type> m2(m); + matcher.setf(match_not_null | match_continuous); + if(matcher.find()) + { + ++count; + if(0 == foo(m)) + return count; + } + else + { + // reset match back to where it was: + m = m2; + } + matcher.unsetf((match_not_null | match_continuous) & ~flags); + } + } + return count; +} + +// +// regex_grep convenience interfaces: +#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING +// +// this isn't really a partial specialisation, but template function +// overloading - if the compiler doesn't support partial specialisation +// then it really won't support this either: +template <class Predicate, class charT, class traits> +inline unsigned int regex_grep(Predicate foo, const charT* str, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, str, str + traits::length(str), e, flags); +} + +template <class Predicate, class ST, class SA, class charT, class traits> +inline unsigned int regex_grep(Predicate foo, const std::basic_string<charT, ST, SA>& s, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, s.begin(), s.end(), e, flags); +} +#else // partial specialisation +inline unsigned int regex_grep(bool (*foo)(const cmatch&), const char* str, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, str, str + regex::traits_type::length(str), e, flags); +} +#ifndef BOOST_NO_WREGEX +inline unsigned int regex_grep(bool (*foo)(const wcmatch&), const wchar_t* str, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, str, str + wregex::traits_type::length(str), e, flags); +} +#endif +inline unsigned int regex_grep(bool (*foo)(const match_results<std::string::const_iterator>&), const std::string& s, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, s.begin(), s.end(), e, flags); +} +#if !defined(BOOST_NO_WREGEX) +inline unsigned int regex_grep(bool (*foo)(const match_results<std::basic_string<wchar_t>::const_iterator>&), + const std::basic_string<wchar_t>& s, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, s.begin(), s.end(), e, flags); +} +#endif +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif // BOOST_REGEX_V4_REGEX_GREP_HPP + diff --git a/ext/boost/regex/v4/regex_iterator.hpp b/ext/boost/regex/v4/regex_iterator.hpp new file mode 100644 index 0000000000..c2f2c49f2e --- /dev/null +++ b/ext/boost/regex/v4/regex_iterator.hpp @@ -0,0 +1,201 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_iterator.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Provides regex_iterator implementation. + */ + +#ifndef BOOST_REGEX_V4_REGEX_ITERATOR_HPP +#define BOOST_REGEX_V4_REGEX_ITERATOR_HPP + +#include <boost/shared_ptr.hpp> + +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +template <class BidirectionalIterator, + class charT, + class traits> +class regex_iterator_implementation +{ + typedef basic_regex<charT, traits> regex_type; + + match_results<BidirectionalIterator> what; // current match + BidirectionalIterator base; // start of sequence + BidirectionalIterator end; // end of sequence + const regex_type re; // the expression + match_flag_type flags; // flags for matching + +public: + regex_iterator_implementation(const regex_type* p, BidirectionalIterator last, match_flag_type f) + : base(), end(last), re(*p), flags(f){} + bool init(BidirectionalIterator first) + { + base = first; + return regex_search(first, end, what, re, flags); + } + bool compare(const regex_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) && (end == that.end) && (flags == that.flags) && (what[0].first == that.what[0].first) && (what[0].second == that.what[0].second); + } + const match_results<BidirectionalIterator>& get() + { return what; } + bool next() + { + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail; + BidirectionalIterator next_start = what[0].second; + match_flag_type f(flags); + if(!what.length()) + f |= regex_constants::match_not_initial_null; + //if(base != next_start) + // f |= regex_constants::match_not_bob; + bool result = regex_search(next_start, end, what, re, f, base); + if(result) + what.set_base(base); + return result; + } +private: + regex_iterator_implementation& operator=(const regex_iterator_implementation&); +}; + +template <class BidirectionalIterator, + class charT = BOOST_DEDUCED_TYPENAME re_detail::regex_iterator_traits<BidirectionalIterator>::value_type, + class traits = regex_traits<charT> > +class regex_iterator +#ifndef BOOST_NO_STD_ITERATOR + : public std::iterator< + std::forward_iterator_tag, + match_results<BidirectionalIterator>, + typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type, + const match_results<BidirectionalIterator>*, + const match_results<BidirectionalIterator>& > +#endif +{ +private: + typedef regex_iterator_implementation<BidirectionalIterator, charT, traits> impl; + typedef shared_ptr<impl> pimpl; +public: + typedef basic_regex<charT, traits> regex_type; + typedef match_results<BidirectionalIterator> value_type; + typedef typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + regex_iterator(){} + regex_iterator(BidirectionalIterator a, BidirectionalIterator b, + const regex_type& re, + match_flag_type m = match_default) + : pdata(new impl(&re, b, m)) + { + if(!pdata->init(a)) + { + pdata.reset(); + } + } + regex_iterator(const regex_iterator& that) + : pdata(that.pdata) {} + regex_iterator& operator=(const regex_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const regex_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const regex_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + regex_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + regex_iterator operator++(int) + { + regex_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef regex_iterator<const char*> cregex_iterator; +typedef regex_iterator<std::string::const_iterator> sregex_iterator; +#ifndef BOOST_NO_WREGEX +typedef regex_iterator<const wchar_t*> wcregex_iterator; +typedef regex_iterator<std::wstring::const_iterator> wsregex_iterator; +#endif + +// make_regex_iterator: +template <class charT, class traits> +inline regex_iterator<const charT*, charT, traits> make_regex_iterator(const charT* p, const basic_regex<charT, traits>& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_iterator<const charT*, charT, traits>(p, p+traits::length(p), e, m); +} +template <class charT, class traits, class ST, class SA> +inline regex_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> make_regex_iterator(const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>(p.begin(), p.end(), e, m); +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif // BOOST_REGEX_V4_REGEX_ITERATOR_HPP + diff --git a/ext/boost/regex/v4/regex_match.hpp b/ext/boost/regex/v4/regex_match.hpp new file mode 100644 index 0000000000..e947a15225 --- /dev/null +++ b/ext/boost/regex/v4/regex_match.hpp @@ -0,0 +1,382 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_match.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Regular expression matching algorithms. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + + +#ifndef BOOST_REGEX_MATCH_HPP +#define BOOST_REGEX_MATCH_HPP + +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +// +// proc regex_match +// returns true if the specified regular expression matches +// the whole of the input. Fills in what matched in m. +// +template <class BidiIterator, class Allocator, class charT, class traits> +bool regex_match(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + re_detail::perl_matcher<BidiIterator, Allocator, traits> matcher(first, last, m, e, flags, first); + return matcher.match(); +} +template <class iterator, class charT, class traits> +bool regex_match(iterator first, iterator last, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + match_results<iterator> m; + return regex_match(first, last, m, e, flags | regex_constants::match_any); +} +// +// query_match convenience interfaces: +#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING +// +// this isn't really a partial specialisation, but template function +// overloading - if the compiler doesn't support partial specialisation +// then it really won't support this either: +template <class charT, class Allocator, class traits> +inline bool regex_match(const charT* str, + match_results<const charT*, Allocator>& m, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + traits::length(str), m, e, flags); +} + +template <class ST, class SA, class Allocator, class charT, class traits> +inline bool regex_match(const std::basic_string<charT, ST, SA>& s, + match_results<typename std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +template <class charT, class traits> +inline bool regex_match(const charT* str, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + match_results<const charT*> m; + return regex_match(str, str + traits::length(str), m, e, flags | regex_constants::match_any); +} + +template <class ST, class SA, class charT, class traits> +inline bool regex_match(const std::basic_string<charT, ST, SA>& s, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + typedef typename std::basic_string<charT, ST, SA>::const_iterator iterator; + match_results<iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#else // partial ordering +inline bool regex_match(const char* str, + cmatch& m, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + regex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const char* str, + const regex& e, + match_flag_type flags = match_default) +{ + match_results<const char*> m; + return regex_match(str, str + regex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#ifndef BOOST_NO_STD_LOCALE +inline bool regex_match(const char* str, + cmatch& m, + const basic_regex<char, cpp_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + regex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const char* str, + const basic_regex<char, cpp_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + match_results<const char*> m; + return regex_match(str, str + regex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_match(const char* str, + cmatch& m, + const basic_regex<char, c_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + regex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const char* str, + const basic_regex<char, c_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + match_results<const char*> m; + return regex_match(str, str + regex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +inline bool regex_match(const char* str, + cmatch& m, + const basic_regex<char, w32_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + regex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const char* str, + const basic_regex<char, w32_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + match_results<const char*> m; + return regex_match(str, str + regex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#endif +#ifndef BOOST_NO_WREGEX +inline bool regex_match(const wchar_t* str, + wcmatch& m, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const wchar_t* str, + const wregex& e, + match_flag_type flags = match_default) +{ + match_results<const wchar_t*> m; + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#ifndef BOOST_NO_STD_LOCALE +inline bool regex_match(const wchar_t* str, + wcmatch& m, + const basic_regex<wchar_t, cpp_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const wchar_t* str, + const basic_regex<wchar_t, cpp_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + match_results<const wchar_t*> m; + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_match(const wchar_t* str, + wcmatch& m, + const basic_regex<wchar_t, c_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const wchar_t* str, + const basic_regex<wchar_t, c_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + match_results<const wchar_t*> m; + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +inline bool regex_match(const wchar_t* str, + wcmatch& m, + const basic_regex<wchar_t, w32_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const wchar_t* str, + const basic_regex<wchar_t, w32_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + match_results<const wchar_t*> m; + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#endif +#endif +inline bool regex_match(const std::string& s, + smatch& m, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::string& s, + const regex& e, + match_flag_type flags = match_default) +{ + match_results<std::string::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#ifndef BOOST_NO_STD_LOCALE +inline bool regex_match(const std::string& s, + smatch& m, + const basic_regex<char, cpp_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::string& s, + const basic_regex<char, cpp_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + match_results<std::string::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_match(const std::string& s, + smatch& m, + const basic_regex<char, c_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::string& s, + const basic_regex<char, c_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + match_results<std::string::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +inline bool regex_match(const std::string& s, + smatch& m, + const basic_regex<char, w32_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::string& s, + const basic_regex<char, w32_regex_traits<char> >& e, + match_flag_type flags = match_default) +{ + match_results<std::string::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#endif +#if !defined(BOOST_NO_WREGEX) +inline bool regex_match(const std::basic_string<wchar_t>& s, + match_results<std::basic_string<wchar_t>::const_iterator>& m, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::basic_string<wchar_t>& s, + const wregex& e, + match_flag_type flags = match_default) +{ + match_results<std::basic_string<wchar_t>::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#ifndef BOOST_NO_STD_LOCALE +inline bool regex_match(const std::basic_string<wchar_t>& s, + match_results<std::basic_string<wchar_t>::const_iterator>& m, + const basic_regex<wchar_t, cpp_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::basic_string<wchar_t>& s, + const basic_regex<wchar_t, cpp_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + match_results<std::basic_string<wchar_t>::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_match(const std::basic_string<wchar_t>& s, + match_results<std::basic_string<wchar_t>::const_iterator>& m, + const basic_regex<wchar_t, c_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::basic_string<wchar_t>& s, + const basic_regex<wchar_t, c_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + match_results<std::basic_string<wchar_t>::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +inline bool regex_match(const std::basic_string<wchar_t>& s, + match_results<std::basic_string<wchar_t>::const_iterator>& m, + const basic_regex<wchar_t, w32_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::basic_string<wchar_t>& s, + const basic_regex<wchar_t, w32_regex_traits<wchar_t> >& e, + match_flag_type flags = match_default) +{ + match_results<std::basic_string<wchar_t>::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#endif +#endif + +#endif + + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif // BOOST_REGEX_MATCH_HPP + + + + + + + + + + + + + + + + + + diff --git a/ext/boost/regex/v4/regex_merge.hpp b/ext/boost/regex/v4/regex_merge.hpp new file mode 100644 index 0000000000..404ca77501 --- /dev/null +++ b/ext/boost/regex/v4/regex_merge.hpp @@ -0,0 +1,93 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_V4_REGEX_MERGE_HPP +#define BOOST_REGEX_V4_REGEX_MERGE_HPP + + +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +template <class OutputIterator, class Iterator, class traits, class charT> +inline OutputIterator regex_merge(OutputIterator out, + Iterator first, + Iterator last, + const basic_regex<charT, traits>& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return regex_replace(out, first, last, e, fmt, flags); +} + +template <class OutputIterator, class Iterator, class traits, class charT> +inline OutputIterator regex_merge(OutputIterator out, + Iterator first, + Iterator last, + const basic_regex<charT, traits>& e, + const std::basic_string<charT>& fmt, + match_flag_type flags = match_default) +{ + return regex_merge(out, first, last, e, fmt.c_str(), flags); +} + +template <class traits, class charT> +inline std::basic_string<charT> regex_merge(const std::basic_string<charT>& s, + const basic_regex<charT, traits>& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return regex_replace(s, e, fmt, flags); +} + +template <class traits, class charT> +inline std::basic_string<charT> regex_merge(const std::basic_string<charT>& s, + const basic_regex<charT, traits>& e, + const std::basic_string<charT>& fmt, + match_flag_type flags = match_default) +{ + return regex_replace(s, e, fmt, flags); +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif // BOOST_REGEX_V4_REGEX_MERGE_HPP + + diff --git a/ext/boost/regex/v4/regex_raw_buffer.hpp b/ext/boost/regex/v4/regex_raw_buffer.hpp new file mode 100644 index 0000000000..52d45a250b --- /dev/null +++ b/ext/boost/regex/v4/regex_raw_buffer.hpp @@ -0,0 +1,210 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_raw_buffer.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Raw character buffer for regex code. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_RAW_BUFFER_HPP +#define BOOST_REGEX_RAW_BUFFER_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include <boost/regex/config.hpp> +#endif + +#include <algorithm> +#include <cstddef> + +namespace boost{ + namespace re_detail{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +struct empty_padding{}; + +union padding +{ + void* p; + unsigned int i; +}; + +template <int N> +struct padding3 +{ + enum{ + padding_size = 8, + padding_mask = 7 + }; +}; + +template<> +struct padding3<2> +{ + enum{ + padding_size = 2, + padding_mask = 1 + }; +}; + +template<> +struct padding3<4> +{ + enum{ + padding_size = 4, + padding_mask = 3 + }; +}; + +template<> +struct padding3<8> +{ + enum{ + padding_size = 8, + padding_mask = 7 + }; +}; + +template<> +struct padding3<16> +{ + enum{ + padding_size = 16, + padding_mask = 15 + }; +}; + +enum{ + padding_size = padding3<sizeof(padding)>::padding_size, + padding_mask = padding3<sizeof(padding)>::padding_mask +}; + +// +// class raw_storage +// basically this is a simplified vector<unsigned char> +// this is used by basic_regex for expression storage +// + +class BOOST_REGEX_DECL raw_storage +{ +public: + typedef std::size_t size_type; + typedef unsigned char* pointer; +private: + pointer last, start, end; +public: + + raw_storage(); + raw_storage(size_type n); + + ~raw_storage() + { + ::operator delete(start); + } + + void BOOST_REGEX_CALL resize(size_type n); + + void* BOOST_REGEX_CALL extend(size_type n) + { + if(size_type(last - end) < n) + resize(n + (end - start)); + register pointer result = end; + end += n; + return result; + } + + void* BOOST_REGEX_CALL insert(size_type pos, size_type n); + + size_type BOOST_REGEX_CALL size() + { + return end - start; + } + + size_type BOOST_REGEX_CALL capacity() + { + return last - start; + } + + void* BOOST_REGEX_CALL data()const + { + return start; + } + + size_type BOOST_REGEX_CALL index(void* ptr) + { + return static_cast<pointer>(ptr) - static_cast<pointer>(data()); + } + + void BOOST_REGEX_CALL clear() + { + end = start; + } + + void BOOST_REGEX_CALL align() + { + // move end up to a boundary: + end = start + (((end - start) + padding_mask) & ~padding_mask); + } + void swap(raw_storage& that) + { + std::swap(start, that.start); + std::swap(end, that.end); + std::swap(last, that.last); + } +}; + +inline raw_storage::raw_storage() +{ + last = start = end = 0; +} + +inline raw_storage::raw_storage(size_type n) +{ + start = end = static_cast<pointer>(::operator new(n)); + BOOST_REGEX_NOEH_ASSERT(start) + last = start + n; +} + + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace re_detail +} // namespace boost + +#endif + + + + + + diff --git a/ext/boost/regex/v4/regex_replace.hpp b/ext/boost/regex/v4/regex_replace.hpp new file mode 100644 index 0000000000..c4544c05b9 --- /dev/null +++ b/ext/boost/regex/v4/regex_replace.hpp @@ -0,0 +1,122 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_V4_REGEX_REPLACE_HPP +#define BOOST_REGEX_V4_REGEX_REPLACE_HPP + + +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +template <class OutputIterator, class BidirectionalIterator, class traits, class charT> +OutputIterator regex_replace(OutputIterator out, + BidirectionalIterator first, + BidirectionalIterator last, + const basic_regex<charT, traits>& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + regex_iterator<BidirectionalIterator, charT, traits> i(first, last, e, flags); + regex_iterator<BidirectionalIterator, charT, traits> j; + if(i == j) + { + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(first, last, out); + } + else + { + BidirectionalIterator last_m(first); + while(i != j) + { + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(i->prefix().first, i->prefix().second, out); + out = i->format(out, fmt, flags, e); + last_m = (*i)[0].second; + if(flags & regex_constants::format_first_only) + break; + ++i; + } + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(last_m, last, out); + } + return out; +} + +template <class OutputIterator, class Iterator, class traits, class charT> +inline OutputIterator regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const basic_regex<charT, traits>& e, + const std::basic_string<charT>& fmt, + match_flag_type flags = match_default) +{ + return regex_replace(out, first, last, e, fmt.c_str(), flags); +} + +template <class traits, class charT> +std::basic_string<charT> regex_replace(const std::basic_string<charT>& s, + const basic_regex<charT, traits>& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + std::basic_string<charT> result; + re_detail::string_out_iterator<std::basic_string<charT> > i(result); + regex_replace(i, s.begin(), s.end(), e, fmt, flags); + return result; +} + +template <class traits, class charT> +std::basic_string<charT> regex_replace(const std::basic_string<charT>& s, + const basic_regex<charT, traits>& e, + const std::basic_string<charT>& fmt, + match_flag_type flags = match_default) +{ + std::basic_string<charT> result; + re_detail::string_out_iterator<std::basic_string<charT> > i(result); + regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags); + return result; +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif // BOOST_REGEX_V4_REGEX_REPLACE_HPP + + diff --git a/ext/boost/regex/v4/regex_search.hpp b/ext/boost/regex/v4/regex_search.hpp new file mode 100644 index 0000000000..cf5579d2c7 --- /dev/null +++ b/ext/boost/regex/v4/regex_search.hpp @@ -0,0 +1,217 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_search.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Provides regex_search implementation. + */ + +#ifndef BOOST_REGEX_V4_REGEX_SEARCH_HPP +#define BOOST_REGEX_V4_REGEX_SEARCH_HPP + + +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +template <class BidiIterator, class Allocator, class charT, class traits> +bool regex_search(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + return regex_search(first, last, m, e, flags, first); +} + +template <class BidiIterator, class Allocator, class charT, class traits> +bool regex_search(BidiIterator first, BidiIterator last, + match_results<BidiIterator, Allocator>& m, + const basic_regex<charT, traits>& e, + match_flag_type flags, + BidiIterator base) +{ + if(e.flags() & regex_constants::failbit) + return false; + + re_detail::perl_matcher<BidiIterator, Allocator, traits> matcher(first, last, m, e, flags, base); + return matcher.find(); +} + +// +// regex_search convenience interfaces: +#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING +// +// this isn't really a partial specialisation, but template function +// overloading - if the compiler doesn't support partial specialisation +// then it really won't support this either: +template <class charT, class Allocator, class traits> +inline bool regex_search(const charT* str, + match_results<const charT*, Allocator>& m, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + traits::length(str), m, e, flags); +} + +template <class ST, class SA, class Allocator, class charT, class traits> +inline bool regex_search(const std::basic_string<charT, ST, SA>& s, + match_results<typename std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), m, e, flags); +} +#else // partial overloads: +inline bool regex_search(const char* str, + cmatch& m, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + regex::traits_type::length(str), m, e, flags); +} +inline bool regex_search(const char* first, const char* last, + const regex& e, + match_flag_type flags = match_default) +{ + cmatch m; + return regex_search(first, last, m, e, flags | regex_constants::match_any); +} + +#ifndef BOOST_NO_WREGEX +inline bool regex_search(const wchar_t* str, + wcmatch& m, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + wregex::traits_type::length(str), m, e, flags); +} +inline bool regex_search(const wchar_t* first, const wchar_t* last, + const wregex& e, + match_flag_type flags = match_default) +{ + wcmatch m; + return regex_search(first, last, m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_search(const std::string& s, + smatch& m, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), m, e, flags); +} +#if !defined(BOOST_NO_WREGEX) +inline bool regex_search(const std::basic_string<wchar_t>& s, + wsmatch& m, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), m, e, flags); +} +#endif + +#endif + +template <class BidiIterator, class charT, class traits> +bool regex_search(BidiIterator first, BidiIterator last, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + if(e.flags() & regex_constants::failbit) + return false; + + match_results<BidiIterator> m; + typedef typename match_results<BidiIterator>::allocator_type match_alloc_type; + re_detail::perl_matcher<BidiIterator, match_alloc_type, traits> matcher(first, last, m, e, flags | regex_constants::match_any, first); + return matcher.find(); +} + +#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING + +template <class charT, class traits> +inline bool regex_search(const charT* str, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + traits::length(str), e, flags); +} + +template <class ST, class SA, class charT, class traits> +inline bool regex_search(const std::basic_string<charT, ST, SA>& s, + const basic_regex<charT, traits>& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), e, flags); +} +#else // non-template function overloads +inline bool regex_search(const char* str, + const regex& e, + match_flag_type flags = match_default) +{ + cmatch m; + return regex_search(str, str + regex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#ifndef BOOST_NO_WREGEX +inline bool regex_search(const wchar_t* str, + const wregex& e, + match_flag_type flags = match_default) +{ + wcmatch m; + return regex_search(str, str + wregex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_search(const std::string& s, + const regex& e, + match_flag_type flags = match_default) +{ + smatch m; + return regex_search(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#if !defined(BOOST_NO_WREGEX) +inline bool regex_search(const std::basic_string<wchar_t>& s, + const wregex& e, + match_flag_type flags = match_default) +{ + wsmatch m; + return regex_search(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} + +#endif // BOOST_NO_WREGEX + +#endif // partial overload + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif // BOOST_REGEX_V4_REGEX_SEARCH_HPP + + diff --git a/ext/boost/regex/v4/regex_split.hpp b/ext/boost/regex/v4/regex_split.hpp new file mode 100644 index 0000000000..a7ae350f4a --- /dev/null +++ b/ext/boost/regex/v4/regex_split.hpp @@ -0,0 +1,172 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_split.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Implements regex_split and associated functions. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_SPLIT_HPP +#define BOOST_REGEX_SPLIT_HPP + +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace re_detail{ + +template <class charT> +const basic_regex<charT>& get_default_expression(charT) +{ + static const charT expression_text[4] = { '\\', 's', '+', '\00', }; + static const basic_regex<charT> e(expression_text); + return e; +} + +template <class OutputIterator, class charT, class Traits1, class Alloc1> +class split_pred +{ + typedef std::basic_string<charT, Traits1, Alloc1> string_type; + typedef typename string_type::const_iterator iterator_type; + iterator_type* p_last; + OutputIterator* p_out; + std::size_t* p_max; + std::size_t initial_max; +public: + split_pred(iterator_type* a, OutputIterator* b, std::size_t* c) + : p_last(a), p_out(b), p_max(c), initial_max(*c) {} + + bool operator()(const match_results<iterator_type>& what); +}; + +template <class OutputIterator, class charT, class Traits1, class Alloc1> +bool split_pred<OutputIterator, charT, Traits1, Alloc1>::operator() + (const match_results<iterator_type>& what) +{ + *p_last = what[0].second; + if(what.size() > 1) + { + // output sub-expressions only: + for(unsigned i = 1; i < what.size(); ++i) + { + *(*p_out) = what.str(i); + ++(*p_out); + if(0 == --*p_max) return false; + } + return *p_max != 0; + } + else + { + // output $` only if it's not-null or not at the start of the input: + const sub_match<iterator_type>& sub = what[-1]; + if((sub.first != sub.second) || (*p_max != initial_max)) + { + *(*p_out) = sub.str(); + ++(*p_out); + return --*p_max; + } + } + // + // initial null, do nothing: + return true; +} + +} // namespace re_detail + +template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2> +std::size_t regex_split(OutputIterator out, + std::basic_string<charT, Traits1, Alloc1>& s, + const basic_regex<charT, Traits2>& e, + match_flag_type flags, + std::size_t max_split) +{ + typedef typename std::basic_string<charT, Traits1, Alloc1>::const_iterator ci_t; + typedef typename match_results<ci_t>::allocator_type match_allocator; + ci_t last = s.begin(); + std::size_t init_size = max_split; + re_detail::split_pred<OutputIterator, charT, Traits1, Alloc1> pred(&last, &out, &max_split); + ci_t i, j; + i = s.begin(); + j = s.end(); + regex_grep(pred, i, j, e, flags); + // + // if there is still input left, do a final push as long as max_split + // is not exhausted, and we're not splitting sub-expressions rather + // than whitespace: + if(max_split && (last != s.end()) && (e.mark_count() == 1)) + { + *out = std::basic_string<charT, Traits1, Alloc1>((ci_t)last, (ci_t)s.end()); + ++out; + last = s.end(); + --max_split; + } + // + // delete from the string everything that has been processed so far: + s.erase(0, last - s.begin()); + // + // return the number of new records pushed: + return init_size - max_split; +} + +template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2> +inline std::size_t regex_split(OutputIterator out, + std::basic_string<charT, Traits1, Alloc1>& s, + const basic_regex<charT, Traits2>& e, + match_flag_type flags = match_default) +{ + return regex_split(out, s, e, flags, UINT_MAX); +} + +template <class OutputIterator, class charT, class Traits1, class Alloc1> +inline std::size_t regex_split(OutputIterator out, + std::basic_string<charT, Traits1, Alloc1>& s) +{ + return regex_split(out, s, re_detail::get_default_expression(charT(0)), match_default, UINT_MAX); +} + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif + + diff --git a/ext/boost/regex/v4/regex_token_iterator.hpp b/ext/boost/regex/v4/regex_token_iterator.hpp new file mode 100644 index 0000000000..4e8bc36fef --- /dev/null +++ b/ext/boost/regex/v4/regex_token_iterator.hpp @@ -0,0 +1,342 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_token_iterator.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Provides regex_token_iterator implementation. + */ + +#ifndef BOOST_REGEX_V4_REGEX_TOKEN_ITERATOR_HPP +#define BOOST_REGEX_V4_REGEX_TOKEN_ITERATOR_HPP + +#include <boost/shared_ptr.hpp> +#include <boost/detail/workaround.hpp> +#if (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) +// +// Borland C++ Builder 6, and Visual C++ 6, +// can't cope with the array template constructor +// so we have a template member that will accept any type as +// argument, and then assert that is really is an array: +// +#include <boost/static_assert.hpp> +#include <boost/type_traits/is_array.hpp> +#endif + +namespace boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) +# pragma warning(push) +# pragma warning(disable:4700) +#endif + +template <class BidirectionalIterator, + class charT, + class traits> +class regex_token_iterator_implementation +{ + typedef basic_regex<charT, traits> regex_type; + typedef sub_match<BidirectionalIterator> value_type; + + match_results<BidirectionalIterator> what; // current match + BidirectionalIterator base; // start of search area + BidirectionalIterator end; // end of search area + const regex_type re; // the expression + match_flag_type flags; // match flags + value_type result; // the current string result + int N; // the current sub-expression being enumerated + std::vector<int> subs; // the sub-expressions to enumerate + +public: + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, int sub, match_flag_type f) + : end(last), re(*p), flags(f){ subs.push_back(sub); } + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const std::vector<int>& v, match_flag_type f) + : end(last), re(*p), flags(f), subs(v){} +#if !BOOST_WORKAROUND(__HP_aCC, < 60700) +#if (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) \ + || BOOST_WORKAROUND(__HP_aCC, < 60700) + template <class T> + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const T& submatches, match_flag_type f) + : end(last), re(*p), flags(f) + { + // assert that T really is an array: + BOOST_STATIC_ASSERT(::boost::is_array<T>::value); + const std::size_t array_size = sizeof(T) / sizeof(submatches[0]); + for(std::size_t i = 0; i < array_size; ++i) + { + subs.push_back(submatches[i]); + } + } +#else + template <std::size_t CN> + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const int (&submatches)[CN], match_flag_type f) + : end(last), re(*p), flags(f) + { + for(std::size_t i = 0; i < CN; ++i) + { + subs.push_back(submatches[i]); + } + } +#endif +#endif + bool init(BidirectionalIterator first) + { + N = 0; + base = first; + if(regex_search(first, end, what, re, flags, base) == true) + { + N = 0; + result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]); + return true; + } + else if((subs[N] == -1) && (first != end)) + { + result.first = first; + result.second = end; + result.matched = (first != end); + N = -1; + return true; + } + return false; + } + bool compare(const regex_token_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) + && (end == that.end) + && (flags == that.flags) + && (N == that.N) + && (what[0].first == that.what[0].first) + && (what[0].second == that.what[0].second); + } + const value_type& get() + { return result; } + bool next() + { + if(N == -1) + return false; + if(N+1 < (int)subs.size()) + { + ++N; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + //if(what.prefix().first != what[0].second) + // flags |= /*match_prev_avail |*/ regex_constants::match_not_bob; + BidirectionalIterator last_end(what[0].second); + if(regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags), base)) + { + N =0; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + else if((last_end != end) && (subs[0] == -1)) + { + N =-1; + result.first = last_end; + result.second = end; + result.matched = (last_end != end); + return true; + } + return false; + } +private: + regex_token_iterator_implementation& operator=(const regex_token_iterator_implementation&); +}; + +template <class BidirectionalIterator, + class charT = BOOST_DEDUCED_TYPENAME re_detail::regex_iterator_traits<BidirectionalIterator>::value_type, + class traits = regex_traits<charT> > +class regex_token_iterator +#ifndef BOOST_NO_STD_ITERATOR + : public std::iterator< + std::forward_iterator_tag, + sub_match<BidirectionalIterator>, + typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type, + const sub_match<BidirectionalIterator>*, + const sub_match<BidirectionalIterator>& > +#endif +{ +private: + typedef regex_token_iterator_implementation<BidirectionalIterator, charT, traits> impl; + typedef shared_ptr<impl> pimpl; +public: + typedef basic_regex<charT, traits> regex_type; + typedef sub_match<BidirectionalIterator> value_type; + typedef typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + regex_token_iterator(){} + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + int submatch = 0, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatch, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const std::vector<int>& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#if !BOOST_WORKAROUND(__HP_aCC, < 60700) +#if (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) \ + || BOOST_WORKAROUND(__HP_aCC, < 60700) + template <class T> + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const T& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#else + template <std::size_t N> + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const int (&submatches)[N], match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#endif +#endif + regex_token_iterator(const regex_token_iterator& that) + : pdata(that.pdata) {} + regex_token_iterator& operator=(const regex_token_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const regex_token_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const regex_token_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + regex_token_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + regex_token_iterator operator++(int) + { + regex_token_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef regex_token_iterator<const char*> cregex_token_iterator; +typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator; +#ifndef BOOST_NO_WREGEX +typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator; +typedef regex_token_iterator<std::wstring::const_iterator> wsregex_token_iterator; +#endif + +template <class charT, class traits> +inline regex_token_iterator<const charT*, charT, traits> make_regex_token_iterator(const charT* p, const basic_regex<charT, traits>& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator<const charT*, charT, traits>(p, p+traits::length(p), e, submatch, m); +} +template <class charT, class traits, class ST, class SA> +inline regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) +template <class charT, class traits, std::size_t N> +inline regex_token_iterator<const charT*, charT, traits> make_regex_token_iterator(const charT* p, const basic_regex<charT, traits>& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator<const charT*, charT, traits>(p, p+traits::length(p), e, submatch, m); +} +template <class charT, class traits, class ST, class SA, std::size_t N> +inline regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} +#endif +template <class charT, class traits> +inline regex_token_iterator<const charT*, charT, traits> make_regex_token_iterator(const charT* p, const basic_regex<charT, traits>& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator<const charT*, charT, traits>(p, p+traits::length(p), e, submatch, m); +} +template <class charT, class traits, class ST, class SA> +inline regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} + +#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) +# pragma warning(pop) +#endif +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace boost + +#endif // BOOST_REGEX_V4_REGEX_TOKEN_ITERATOR_HPP + + + + diff --git a/ext/boost/regex/v4/regex_traits.hpp b/ext/boost/regex/v4/regex_traits.hpp new file mode 100644 index 0000000000..f5f0402cab --- /dev/null +++ b/ext/boost/regex/v4/regex_traits.hpp @@ -0,0 +1,189 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_traits.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares regular expression traits classes. + */ + +#ifndef BOOST_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_REGEX_TRAITS_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include <boost/regex/config.hpp> +#endif +#ifndef BOOST_REGEX_WORKAROUND_HPP +#include <boost/regex/v4/regex_workaround.hpp> +#endif +#ifndef BOOST_REGEX_SYNTAX_TYPE_HPP +#include <boost/regex/v4/syntax_type.hpp> +#endif +#ifndef BOOST_REGEX_ERROR_TYPE_HPP +#include <boost/regex/v4/error_type.hpp> +#endif +#ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED +#include <boost/regex/v4/regex_traits_defaults.hpp> +#endif +#ifndef BOOST_NO_STD_LOCALE +# ifndef BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED +# include <boost/regex/v4/cpp_regex_traits.hpp> +# endif +#endif +#if !BOOST_WORKAROUND(__BORLANDC__, < 0x560) +# ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED +# include <boost/regex/v4/c_regex_traits.hpp> +# endif +#endif +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +# ifndef BOOST_W32_REGEX_TRAITS_HPP_INCLUDED +# include <boost/regex/v4/w32_regex_traits.hpp> +# endif +#endif +#ifndef BOOST_REGEX_FWD_HPP_INCLUDED +#include <boost/regex_fwd.hpp> +#endif + +#include "boost/mpl/has_xxx.hpp" +#include <boost/static_assert.hpp> + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ + +template <class charT, class implementationT > +struct regex_traits : public implementationT +{ + regex_traits() : implementationT() {} +}; + +// +// class regex_traits_wrapper. +// this is what our implementation will actually store; +// it provides default implementations of the "optional" +// interfaces that we support, in addition to the +// required "standard" ones: +// +namespace re_detail{ +#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) && !BOOST_WORKAROUND(__HP_aCC, < 60000) +BOOST_MPL_HAS_XXX_TRAIT_DEF(boost_extensions_tag) +#else +template<class T> +struct has_boost_extensions_tag +{ + BOOST_STATIC_CONSTANT(bool, value = false); +}; +#endif + +template <class BaseT> +struct default_wrapper : public BaseT +{ + typedef typename BaseT::char_type char_type; + std::string error_string(::boost::regex_constants::error_type e)const + { + return ::boost::re_detail::get_default_error_string(e); + } + ::boost::regex_constants::syntax_type syntax_type(char_type c)const + { + return ((c & 0x7f) == c) ? get_default_syntax_type(static_cast<char>(c)) : ::boost::regex_constants::syntax_char; + } + ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c)const + { + return ((c & 0x7f) == c) ? get_default_escape_syntax_type(static_cast<char>(c)) : ::boost::regex_constants::escape_type_identity; + } + int toi(const char_type*& p1, const char_type* p2, int radix)const + { + return ::boost::re_detail::global_toi(p1, p2, radix, *this); + } + char_type translate(char_type c, bool icase)const + { + return (icase ? this->translate_nocase(c) : this->translate(c)); + } + char_type translate(char_type c)const + { + return BaseT::translate(c); + } + char_type tolower(char_type c)const + { + return ::boost::re_detail::global_lower(c); + } + char_type toupper(char_type c)const + { + return ::boost::re_detail::global_upper(c); + } +}; + +template <class BaseT, bool has_extensions> +struct compute_wrapper_base +{ + typedef BaseT type; +}; +#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) && !BOOST_WORKAROUND(__HP_aCC, < 60000) +template <class BaseT> +struct compute_wrapper_base<BaseT, false> +{ + typedef default_wrapper<BaseT> type; +}; +#else +template <> +struct compute_wrapper_base<c_regex_traits<char>, false> +{ + typedef default_wrapper<c_regex_traits<char> > type; +}; +#ifndef BOOST_NO_WREGEX +template <> +struct compute_wrapper_base<c_regex_traits<wchar_t>, false> +{ + typedef default_wrapper<c_regex_traits<wchar_t> > type; +}; +#endif +#endif + +} // namespace re_detail + +template <class BaseT> +struct regex_traits_wrapper + : public ::boost::re_detail::compute_wrapper_base< + BaseT, + ::boost::re_detail::has_boost_extensions_tag<BaseT>::value + >::type +{ + regex_traits_wrapper(){} +private: + regex_traits_wrapper(const regex_traits_wrapper&); + regex_traits_wrapper& operator=(const regex_traits_wrapper&); +}; + +} // namespace boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif // include + diff --git a/ext/boost/regex/v4/regex_traits_defaults.hpp b/ext/boost/regex/v4/regex_traits_defaults.hpp new file mode 100644 index 0000000000..5b2c6bc338 --- /dev/null +++ b/ext/boost/regex/v4/regex_traits_defaults.hpp @@ -0,0 +1,371 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_traits_defaults.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares API's for access to regex_traits default properties. + */ + +#ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED +#define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifndef BOOST_REGEX_SYNTAX_TYPE_HPP +#include <boost/regex/v4/syntax_type.hpp> +#endif +#ifndef BOOST_REGEX_ERROR_TYPE_HPP +#include <boost/regex/v4/error_type.hpp> +#endif + +#ifdef BOOST_NO_STDC_NAMESPACE +namespace std{ + using ::strlen; +} +#endif + +namespace boost{ namespace re_detail{ + + +// +// helpers to suppress warnings: +// +template <class charT> +inline bool is_extended(charT c) +{ return c > 256; } +inline bool is_extended(char) +{ return false; } + + +BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n); +BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n); +BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c); +BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c); + +// is charT c a combining character? +BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s); + +template <class charT> +inline bool is_combining(charT c) +{ + return (c <= static_cast<charT>(0)) ? false : ((c >= static_cast<charT>((std::numeric_limits<uint_least16_t>::max)())) ? false : is_combining_implementation(static_cast<unsigned short>(c))); +} +template <> +inline bool is_combining<char>(char) +{ + return false; +} +template <> +inline bool is_combining<signed char>(signed char) +{ + return false; +} +template <> +inline bool is_combining<unsigned char>(unsigned char) +{ + return false; +} +#ifndef __hpux // can't use WCHAR_MAX/MIN in pp-directives +#ifdef _MSC_VER +template<> +inline bool is_combining<wchar_t>(wchar_t c) +{ + return is_combining_implementation(static_cast<unsigned short>(c)); +} +#elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) +#if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX) +template<> +inline bool is_combining<wchar_t>(wchar_t c) +{ + return is_combining_implementation(static_cast<unsigned short>(c)); +} +#else +template<> +inline bool is_combining<wchar_t>(wchar_t c) +{ + return (c >= (std::numeric_limits<uint_least16_t>::max)()) ? false : is_combining_implementation(static_cast<unsigned short>(c)); +} +#endif +#endif +#endif + +// +// is a charT c a line separator? +// +template <class charT> +inline bool is_separator(charT c) +{ + return BOOST_REGEX_MAKE_BOOL( + (c == static_cast<charT>('\n')) + || (c == static_cast<charT>('\r')) + || (c == static_cast<charT>('\f')) + || (static_cast<boost::uint16_t>(c) == 0x2028u) + || (static_cast<boost::uint16_t>(c) == 0x2029u) + || (static_cast<boost::uint16_t>(c) == 0x85u)); +} +template <> +inline bool is_separator<char>(char c) +{ + return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f')); +} + +// +// get a default collating element: +// +BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name); + +// +// get the state_id of a character clasification, the individual +// traits classes then transform that state_id into a bitmask: +// +template <class charT> +struct character_pointer_range +{ + const charT* p1; + const charT* p2; + + bool operator < (const character_pointer_range& r)const + { + return std::lexicographical_compare(p1, p2, r.p1, r.p2); + } + bool operator == (const character_pointer_range& r)const + { + // Not only do we check that the ranges are of equal size before + // calling std::equal, but there is no other algorithm available: + // not even a non-standard MS one. So forward to unchecked_equal + // in the MS case. + return ((p2 - p1) == (r.p2 - r.p1)) && re_detail::equal(p1, p2, r.p1); + } +}; +template <class charT> +int get_default_class_id(const charT* p1, const charT* p2) +{ + static const charT data[73] = { + 'a', 'l', 'n', 'u', 'm', + 'a', 'l', 'p', 'h', 'a', + 'b', 'l', 'a', 'n', 'k', + 'c', 'n', 't', 'r', 'l', + 'd', 'i', 'g', 'i', 't', + 'g', 'r', 'a', 'p', 'h', + 'l', 'o', 'w', 'e', 'r', + 'p', 'r', 'i', 'n', 't', + 'p', 'u', 'n', 'c', 't', + 's', 'p', 'a', 'c', 'e', + 'u', 'n', 'i', 'c', 'o', 'd', 'e', + 'u', 'p', 'p', 'e', 'r', + 'v', + 'w', 'o', 'r', 'd', + 'x', 'd', 'i', 'g', 'i', 't', + }; + + static const character_pointer_range<charT> ranges[21] = + { + {data+0, data+5,}, // alnum + {data+5, data+10,}, // alpha + {data+10, data+15,}, // blank + {data+15, data+20,}, // cntrl + {data+20, data+21,}, // d + {data+20, data+25,}, // digit + {data+25, data+30,}, // graph + {data+29, data+30,}, // h + {data+30, data+31,}, // l + {data+30, data+35,}, // lower + {data+35, data+40,}, // print + {data+40, data+45,}, // punct + {data+45, data+46,}, // s + {data+45, data+50,}, // space + {data+57, data+58,}, // u + {data+50, data+57,}, // unicode + {data+57, data+62,}, // upper + {data+62, data+63,}, // v + {data+63, data+64,}, // w + {data+63, data+67,}, // word + {data+67, data+73,}, // xdigit + }; + static const character_pointer_range<charT>* ranges_begin = ranges; + static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0])); + + character_pointer_range<charT> t = { p1, p2, }; + const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t); + if((p != ranges_end) && (t == *p)) + return static_cast<int>(p - ranges); + return -1; +} + +// +// helper functions: +// +template <class charT> +std::ptrdiff_t global_length(const charT* p) +{ + std::ptrdiff_t n = 0; + while(*p) + { + ++p; + ++n; + } + return n; +} +template<> +inline std::ptrdiff_t global_length<char>(const char* p) +{ + return (std::strlen)(p); +} +#ifndef BOOST_NO_WREGEX +template<> +inline std::ptrdiff_t global_length<wchar_t>(const wchar_t* p) +{ + return (std::wcslen)(p); +} +#endif +template <class charT> +inline charT BOOST_REGEX_CALL global_lower(charT c) +{ + return c; +} +template <class charT> +inline charT BOOST_REGEX_CALL global_upper(charT c) +{ + return c; +} + +BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c); +BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c); +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c); +#endif +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c); +BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c); +#endif +// +// This sucks: declare template specialisations of global_lower/global_upper +// that just forward to the non-template implementation functions. We do +// this because there is one compiler (Compaq Tru64 C++) that doesn't seem +// to differentiate between templates and non-template overloads.... +// what's more, the primary template, plus all overloads have to be +// defined in the same translation unit (if one is inline they all must be) +// otherwise the "local template instantiation" compiler option can pick +// the wrong instantiation when linking: +// +template<> inline char BOOST_REGEX_CALL global_lower<char>(char c){ return do_global_lower(c); } +template<> inline char BOOST_REGEX_CALL global_upper<char>(char c){ return do_global_upper(c); } +#ifndef BOOST_NO_WREGEX +template<> inline wchar_t BOOST_REGEX_CALL global_lower<wchar_t>(wchar_t c){ return do_global_lower(c); } +template<> inline wchar_t BOOST_REGEX_CALL global_upper<wchar_t>(wchar_t c){ return do_global_upper(c); } +#endif +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +template<> inline unsigned short BOOST_REGEX_CALL global_lower<unsigned short>(unsigned short c){ return do_global_lower(c); } +template<> inline unsigned short BOOST_REGEX_CALL global_upper<unsigned short>(unsigned short c){ return do_global_upper(c); } +#endif + +template <class charT> +int global_value(charT c) +{ + static const charT zero = '0'; + static const charT nine = '9'; + static const charT a = 'a'; + static const charT f = 'f'; + static const charT A = 'A'; + static const charT F = 'F'; + + if(c > f) return -1; + if(c >= a) return 10 + (c - a); + if(c > F) return -1; + if(c >= A) return 10 + (c - A); + if(c > nine) return -1; + if(c >= zero) return c - zero; + return -1; +} +template <class charT, class traits> +int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) +{ + (void)t; // warning suppression + int next_value = t.value(*p1, radix); + if((p1 == p2) || (next_value < 0) || (next_value >= radix)) + return -1; + int result = 0; + while(p1 != p2) + { + next_value = t.value(*p1, radix); + if((next_value < 0) || (next_value >= radix)) + break; + result *= radix; + result += next_value; + ++p1; + } + return result; +} + +template <class charT> +inline const charT* get_escape_R_string() +{ +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable:4309) +#endif + static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', '\x85', '\\', 'x', '{', '2', '0', '2', '8', '}', + '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' }; + static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; + + charT c = static_cast<charT>(0x2029u); + bool b = (static_cast<unsigned>(c) == 0x2029u); + + return (b ? e1 : e2); +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif +} + +template <> +inline const char* get_escape_R_string<char>() +{ +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable:4309) +#endif + static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; + return e2; +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif +} + +} // re_detail +} // boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif diff --git a/ext/boost/regex/v4/regex_workaround.hpp b/ext/boost/regex/v4/regex_workaround.hpp new file mode 100644 index 0000000000..06527f1a1f --- /dev/null +++ b/ext/boost/regex/v4/regex_workaround.hpp @@ -0,0 +1,202 @@ +/* + * + * Copyright (c) 1998-2005 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_workarounds.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares Misc workarounds. + */ + +#ifndef BOOST_REGEX_WORKAROUND_HPP +#define BOOST_REGEX_WORKAROUND_HPP + + +#include <new> +#include <cstring> +#include <cstdlib> +#include <cstddef> +#include <cassert> +#include <cstdio> +#include <climits> +#include <string> +#include <stdexcept> +#include <iterator> +#include <algorithm> +#include <iosfwd> +#include <vector> +#include <map> +#include <boost/limits.hpp> +#include <boost/assert.hpp> +#include <boost/cstdint.hpp> +#include <boost/throw_exception.hpp> +#include <boost/scoped_ptr.hpp> +#include <boost/scoped_array.hpp> +#include <boost/shared_ptr.hpp> +#include <boost/mpl/bool_fwd.hpp> +#ifndef BOOST_NO_STD_LOCALE +# include <locale> +#endif + +#if defined(BOOST_NO_STDC_NAMESPACE) +namespace std{ + using ::sprintf; using ::strcpy; using ::strcat; using ::strlen; +} +#endif + +namespace boost{ namespace re_detail{ +#ifdef BOOST_NO_STD_DISTANCE +template <class T> +std::ptrdiff_t distance(const T& x, const T& y) +{ return y - x; } +#else +using std::distance; +#endif +}} + + +#ifdef BOOST_REGEX_NO_BOOL +# define BOOST_REGEX_MAKE_BOOL(x) static_cast<bool>((x) ? true : false) +#else +# define BOOST_REGEX_MAKE_BOOL(x) static_cast<bool>(x) +#endif + +/***************************************************************************** + * + * Fix broken broken namespace support: + * + ****************************************************************************/ + +#if defined(BOOST_NO_STDC_NAMESPACE) && defined(__cplusplus) + +namespace std{ + using ::ptrdiff_t; + using ::size_t; + using ::abs; + using ::memset; + using ::memcpy; +} + +#endif + +/***************************************************************************** + * + * helper functions pointer_construct/pointer_destroy: + * + ****************************************************************************/ + +#ifdef __cplusplus +namespace boost{ namespace re_detail{ + +#ifdef BOOST_MSVC +#pragma warning (push) +#pragma warning (disable : 4100) +#endif + +template <class T> +inline void pointer_destroy(T* p) +{ p->~T(); (void)p; } + +#ifdef BOOST_MSVC +#pragma warning (pop) +#endif + +template <class T> +inline void pointer_construct(T* p, const T& t) +{ new (p) T(t); } + +}} // namespaces +#endif + +/***************************************************************************** + * + * helper function copy: + * + ****************************************************************************/ + +#ifdef __cplusplus +namespace boost{ namespace re_detail{ +#if BOOST_WORKAROUND(BOOST_MSVC,>=1400) && BOOST_WORKAROUND(BOOST_MSVC, <1600) && defined(_CPPLIB_VER) && defined(BOOST_DINKUMWARE_STDLIB) && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) + // + // MSVC 8 will either emit warnings or else refuse to compile + // code that makes perfectly legitimate use of std::copy, when + // the OutputIterator type is a user-defined class (apparently all user + // defined iterators are "unsafe"). This code works around that: + // + template<class InputIterator, class OutputIterator> + inline OutputIterator copy( + InputIterator first, + InputIterator last, + OutputIterator dest + ) + { + return stdext::unchecked_copy(first, last, dest); + } + template<class InputIterator1, class InputIterator2> + inline bool equal( + InputIterator1 first, + InputIterator1 last, + InputIterator2 with + ) + { + return stdext::unchecked_equal(first, last, with); + } + +#else + using std::copy; + using std::equal; +#endif +#if BOOST_WORKAROUND(BOOST_MSVC,>=1400) && defined(__STDC_WANT_SECURE_LIB__) && __STDC_WANT_SECURE_LIB__ + + // use safe versions of strcpy etc: + using ::strcpy_s; + using ::strcat_s; +#else + inline std::size_t strcpy_s( + char *strDestination, + std::size_t sizeInBytes, + const char *strSource + ) + { + if(std::strlen(strSource)+1 > sizeInBytes) + return 1; + std::strcpy(strDestination, strSource); + return 0; + } + inline std::size_t strcat_s( + char *strDestination, + std::size_t sizeInBytes, + const char *strSource + ) + { + if(std::strlen(strSource) + std::strlen(strDestination) + 1 > sizeInBytes) + return 1; + std::strcat(strDestination, strSource); + return 0; + } + +#endif + + inline void overflow_error_if_not_zero(std::size_t i) + { + if(i) + { + std::overflow_error e("String buffer too small"); + boost::throw_exception(e); + } + } + +}} // namespaces + +#endif // __cplusplus + +#endif // include guard + diff --git a/ext/boost/regex/v4/states.hpp b/ext/boost/regex/v4/states.hpp new file mode 100644 index 0000000000..efdebbe59f --- /dev/null +++ b/ext/boost/regex/v4/states.hpp @@ -0,0 +1,293 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE states.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares internal state machine structures. + */ + +#ifndef BOOST_REGEX_V4_STATES_HPP +#define BOOST_REGEX_V4_STATES_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ +namespace re_detail{ + +/*** mask_type ******************************************************* +Whenever we have a choice of two alternatives, we use an array of bytes +to indicate which of the two alternatives it is possible to take for any +given input character. If mask_take is set, then we can take the next +state, and if mask_skip is set then we can take the alternative. +***********************************************************************/ +enum mask_type +{ + mask_take = 1, + mask_skip = 2, + mask_init = 4, + mask_any = mask_skip | mask_take, + mask_all = mask_any +}; + +/*** helpers ********************************************************** +These helpers let us use function overload resolution to detect whether +we have narrow or wide character strings: +***********************************************************************/ +struct _narrow_type{}; +struct _wide_type{}; +template <class charT> struct is_byte; +template<> struct is_byte<char> { typedef _narrow_type width_type; }; +template<> struct is_byte<unsigned char>{ typedef _narrow_type width_type; }; +template<> struct is_byte<signed char> { typedef _narrow_type width_type; }; +template <class charT> struct is_byte { typedef _wide_type width_type; }; + +/*** enum syntax_element_type ****************************************** +Every record in the state machine falls into one of the following types: +***********************************************************************/ +enum syntax_element_type +{ + // start of a marked sub-expression, or perl-style (?...) extension + syntax_element_startmark = 0, + // end of a marked sub-expression, or perl-style (?...) extension + syntax_element_endmark = syntax_element_startmark + 1, + // any sequence of literal characters + syntax_element_literal = syntax_element_endmark + 1, + // start of line assertion: ^ + syntax_element_start_line = syntax_element_literal + 1, + // end of line assertion $ + syntax_element_end_line = syntax_element_start_line + 1, + // match any character: . + syntax_element_wild = syntax_element_end_line + 1, + // end of expression: we have a match when we get here + syntax_element_match = syntax_element_wild + 1, + // perl style word boundary: \b + syntax_element_word_boundary = syntax_element_match + 1, + // perl style within word boundary: \B + syntax_element_within_word = syntax_element_word_boundary + 1, + // start of word assertion: \< + syntax_element_word_start = syntax_element_within_word + 1, + // end of word assertion: \> + syntax_element_word_end = syntax_element_word_start + 1, + // start of buffer assertion: \` + syntax_element_buffer_start = syntax_element_word_end + 1, + // end of buffer assertion: \' + syntax_element_buffer_end = syntax_element_buffer_start + 1, + // backreference to previously matched sub-expression + syntax_element_backref = syntax_element_buffer_end + 1, + // either a wide character set [..] or one with multicharacter collating elements: + syntax_element_long_set = syntax_element_backref + 1, + // narrow character set: [...] + syntax_element_set = syntax_element_long_set + 1, + // jump to a new state in the machine: + syntax_element_jump = syntax_element_set + 1, + // choose between two production states: + syntax_element_alt = syntax_element_jump + 1, + // a repeat + syntax_element_rep = syntax_element_alt + 1, + // match a combining character sequence + syntax_element_combining = syntax_element_rep + 1, + // perl style soft buffer end: \z + syntax_element_soft_buffer_end = syntax_element_combining + 1, + // perl style continuation: \G + syntax_element_restart_continue = syntax_element_soft_buffer_end + 1, + // single character repeats: + syntax_element_dot_rep = syntax_element_restart_continue + 1, + syntax_element_char_rep = syntax_element_dot_rep + 1, + syntax_element_short_set_rep = syntax_element_char_rep + 1, + syntax_element_long_set_rep = syntax_element_short_set_rep + 1, + // a backstep for lookbehind repeats: + syntax_element_backstep = syntax_element_long_set_rep + 1, + // an assertion that a mark was matched: + syntax_element_assert_backref = syntax_element_backstep + 1, + syntax_element_toggle_case = syntax_element_assert_backref + 1, + // a recursive expression: + syntax_element_recurse = syntax_element_toggle_case + 1 +}; + +#ifdef BOOST_REGEX_DEBUG +// dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion +std::ostream& operator<<(std::ostream&, syntax_element_type); +#endif + +struct re_syntax_base; + +/*** union offset_type ************************************************ +Points to another state in the machine. During machine construction +we use integral offsets, but these are converted to pointers before +execution of the machine. +***********************************************************************/ +union offset_type +{ + re_syntax_base* p; + std::ptrdiff_t i; +}; + +/*** struct re_syntax_base ******************************************** +Base class for all states in the machine. +***********************************************************************/ +struct re_syntax_base +{ + syntax_element_type type; // what kind of state this is + offset_type next; // next state in the machine +}; + +/*** struct re_brace ************************************************** +A marked parenthesis. +***********************************************************************/ +struct re_brace : public re_syntax_base +{ + // The index to match, can be zero (don't mark the sub-expression) + // or negative (for perl style (?...) extentions): + int index; + bool icase; +}; + +/*** struct re_dot ************************************************** +Match anything. +***********************************************************************/ +enum +{ + dont_care = 1, + force_not_newline = 0, + force_newline = 2, + + test_not_newline = 2, + test_newline = 3 +}; +struct re_dot : public re_syntax_base +{ + unsigned char mask; +}; + +/*** struct re_literal ************************************************ +A string of literals, following this structure will be an +array of characters: charT[length] +***********************************************************************/ +struct re_literal : public re_syntax_base +{ + unsigned int length; +}; + +/*** struct re_case ************************************************ +Indicates whether we are moving to a case insensive block or not +***********************************************************************/ +struct re_case : public re_syntax_base +{ + bool icase; +}; + +/*** struct re_set_long *********************************************** +A wide character set of characters, following this structure will be +an array of type charT: +First csingles null-terminated strings +Then 2 * cranges NULL terminated strings +Then cequivalents NULL terminated strings +***********************************************************************/ +template <class mask_type> +struct re_set_long : public re_syntax_base +{ + unsigned int csingles, cranges, cequivalents; + mask_type cclasses; + mask_type cnclasses; + bool isnot; + bool singleton; +}; + +/*** struct re_set **************************************************** +A set of narrow-characters, matches any of _map which is none-zero +***********************************************************************/ +struct re_set : public re_syntax_base +{ + unsigned char _map[1 << CHAR_BIT]; +}; + +/*** struct re_jump *************************************************** +Jump to a new location in the machine (not next). +***********************************************************************/ +struct re_jump : public re_syntax_base +{ + offset_type alt; // location to jump to +}; + +/*** struct re_alt *************************************************** +Jump to a new location in the machine (possibly next). +***********************************************************************/ +struct re_alt : public re_jump +{ + unsigned char _map[1 << CHAR_BIT]; // which characters can take the jump + unsigned int can_be_null; // true if we match a NULL string +}; + +/*** struct re_repeat ************************************************* +Repeat a section of the machine +***********************************************************************/ +struct re_repeat : public re_alt +{ + std::size_t min, max; // min and max allowable repeats + int state_id; // Unique identifier for this repeat + bool leading; // True if this repeat is at the start of the machine (lets us optimize some searches) + bool greedy; // True if this is a greedy repeat +}; + +/*** enum re_jump_size_type ******************************************* +Provides compiled size of re_jump structure (allowing for trailing alignment). +We provide this so we know how manybytes to insert when constructing the machine +(The value of padding_mask is defined in regex_raw_buffer.hpp). +***********************************************************************/ +enum re_jump_size_type +{ + re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask), + re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask), + re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask) +}; + +/*** proc re_is_set_member ********************************************* +Forward declaration: we'll need this one later... +***********************************************************************/ + +template<class charT, class traits> +struct regex_data; + +template <class iterator, class charT, class traits_type, class char_classT> +iterator BOOST_REGEX_CALL re_is_set_member(iterator next, + iterator last, + const re_set_long<char_classT>* set_, + const regex_data<charT, traits_type>& e, bool icase); + +} // namespace re_detail + +} // namespace boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + diff --git a/ext/boost/regex/v4/sub_match.hpp b/ext/boost/regex/v4/sub_match.hpp new file mode 100644 index 0000000000..1c79e39a9a --- /dev/null +++ b/ext/boost/regex/v4/sub_match.hpp @@ -0,0 +1,509 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE sub_match.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares template class sub_match. + */ + +#ifndef BOOST_REGEX_V4_SUB_MATCH_HPP +#define BOOST_REGEX_V4_SUB_MATCH_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace boost{ + +template <class BidiIterator> +struct sub_match : public std::pair<BidiIterator, BidiIterator> +{ + typedef typename re_detail::regex_iterator_traits<BidiIterator>::value_type value_type; +#if defined(BOOST_NO_STD_ITERATOR_TRAITS) || defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + typedef std::ptrdiff_t difference_type; +#else + typedef typename re_detail::regex_iterator_traits<BidiIterator>::difference_type difference_type; +#endif + typedef BidiIterator iterator_type; + typedef BidiIterator iterator; + typedef BidiIterator const_iterator; + + bool matched; + + sub_match() : std::pair<BidiIterator, BidiIterator>(), matched(false) {} + sub_match(BidiIterator i) : std::pair<BidiIterator, BidiIterator>(i, i), matched(false) {} +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1310)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)\ + && !BOOST_WORKAROUND(__DECCXX_VER, BOOST_TESTED_AT(60590042)) + template <class T, class A> + operator std::basic_string<value_type, T, A> ()const + { + return std::basic_string<value_type, T, A>(this->first, this->second); + } +#else + operator std::basic_string<value_type> ()const + { + return str(); + } +#endif + difference_type BOOST_REGEX_CALL length()const + { + difference_type n = ::boost::re_detail::distance((BidiIterator)this->first, (BidiIterator)this->second); + return n; + } + std::basic_string<value_type> str()const + { + std::basic_string<value_type> result; + std::size_t len = ::boost::re_detail::distance((BidiIterator)this->first, (BidiIterator)this->second); + result.reserve(len); + BidiIterator i = this->first; + while(i != this->second) + { + result.append(1, *i); + ++i; + } + return result; + } + int compare(const sub_match& s)const + { + if(matched != s.matched) + return static_cast<int>(matched) - static_cast<int>(s.matched); + return str().compare(s.str()); + } + int compare(const std::basic_string<value_type>& s)const + { + return str().compare(s); + } + int compare(const value_type* p)const + { + return str().compare(p); + } + + bool operator==(const sub_match& that)const + { return compare(that) == 0; } + bool BOOST_REGEX_CALL operator !=(const sub_match& that)const + { return compare(that) != 0; } + bool operator<(const sub_match& that)const + { return compare(that) < 0; } + bool operator>(const sub_match& that)const + { return compare(that) > 0; } + bool operator<=(const sub_match& that)const + { return compare(that) <= 0; } + bool operator>=(const sub_match& that)const + { return compare(that) >= 0; } + +#ifdef BOOST_REGEX_MATCH_EXTRA + typedef std::vector<sub_match<BidiIterator> > capture_sequence_type; + + const capture_sequence_type& captures()const + { + if(!m_captures) + m_captures.reset(new capture_sequence_type()); + return *m_captures; + } + // + // Private implementation API: DO NOT USE! + // + capture_sequence_type& get_captures()const + { + if(!m_captures) + m_captures.reset(new capture_sequence_type()); + return *m_captures; + } + +private: + mutable boost::scoped_ptr<capture_sequence_type> m_captures; +public: + +#endif + sub_match(const sub_match& that, bool +#ifdef BOOST_REGEX_MATCH_EXTRA + deep_copy +#endif + = true + ) + : std::pair<BidiIterator, BidiIterator>(that), + matched(that.matched) + { +#ifdef BOOST_REGEX_MATCH_EXTRA + if(that.m_captures) + if(deep_copy) + m_captures.reset(new capture_sequence_type(*(that.m_captures))); +#endif + } + sub_match& operator=(const sub_match& that) + { + this->first = that.first; + this->second = that.second; + matched = that.matched; +#ifdef BOOST_REGEX_MATCH_EXTRA + if(that.m_captures) + get_captures() = *(that.m_captures); +#endif + return *this; + } + + +#ifdef BOOST_OLD_REGEX_H + // + // the following are deprecated, do not use!! + // + operator int()const; + operator unsigned int()const; + operator short()const + { + return (short)(int)(*this); + } + operator unsigned short()const + { + return (unsigned short)(unsigned int)(*this); + } +#endif +}; + +typedef sub_match<const char*> csub_match; +typedef sub_match<std::string::const_iterator> ssub_match; +#ifndef BOOST_NO_WREGEX +typedef sub_match<const wchar_t*> wcsub_match; +typedef sub_match<std::wstring::const_iterator> wssub_match; +#endif + +// comparison to std::basic_string<> part 1: +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator == (const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s, + const sub_match<RandomAccessIterator>& m) +{ return s.compare(m.str()) == 0; } +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator != (const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s, + const sub_match<RandomAccessIterator>& m) +{ return s.compare(m.str()) != 0; } +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator < (const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s, + const sub_match<RandomAccessIterator>& m) +{ return s.compare(m.str()) < 0; } +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator <= (const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s, + const sub_match<RandomAccessIterator>& m) +{ return s.compare(m.str()) <= 0; } +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator >= (const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s, + const sub_match<RandomAccessIterator>& m) +{ return s.compare(m.str()) >= 0; } +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator > (const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s, + const sub_match<RandomAccessIterator>& m) +{ return s.compare(m.str()) > 0; } +// comparison to std::basic_string<> part 2: +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator == (const sub_match<RandomAccessIterator>& m, + const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s) +{ return m.str().compare(s) == 0; } +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator != (const sub_match<RandomAccessIterator>& m, + const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s) +{ return m.str().compare(s) != 0; } +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator < (const sub_match<RandomAccessIterator>& m, + const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s) +{ return m.str().compare(s) < 0; } +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator > (const sub_match<RandomAccessIterator>& m, + const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s) +{ return m.str().compare(s) > 0; } +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator <= (const sub_match<RandomAccessIterator>& m, + const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s) +{ return m.str().compare(s) <= 0; } +template <class RandomAccessIterator, class traits, class Allocator> +inline bool operator >= (const sub_match<RandomAccessIterator>& m, + const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s) +{ return m.str().compare(s) >= 0; } +// comparison to const charT* part 1: +template <class RandomAccessIterator> +inline bool operator == (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s) +{ return m.str().compare(s) == 0; } +template <class RandomAccessIterator> +inline bool operator != (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s) +{ return m.str().compare(s) != 0; } +template <class RandomAccessIterator> +inline bool operator > (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s) +{ return m.str().compare(s) > 0; } +template <class RandomAccessIterator> +inline bool operator < (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s) +{ return m.str().compare(s) < 0; } +template <class RandomAccessIterator> +inline bool operator >= (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s) +{ return m.str().compare(s) >= 0; } +template <class RandomAccessIterator> +inline bool operator <= (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s) +{ return m.str().compare(s) <= 0; } +// comparison to const charT* part 2: +template <class RandomAccessIterator> +inline bool operator == (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(s) == 0; } +template <class RandomAccessIterator> +inline bool operator != (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(s) != 0; } +template <class RandomAccessIterator> +inline bool operator < (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(s) > 0; } +template <class RandomAccessIterator> +inline bool operator > (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(s) < 0; } +template <class RandomAccessIterator> +inline bool operator <= (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(s) >= 0; } +template <class RandomAccessIterator> +inline bool operator >= (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(s) <= 0; } + +// comparison to const charT& part 1: +template <class RandomAccessIterator> +inline bool operator == (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) == 0; } +template <class RandomAccessIterator> +inline bool operator != (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) != 0; } +template <class RandomAccessIterator> +inline bool operator > (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) > 0; } +template <class RandomAccessIterator> +inline bool operator < (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) < 0; } +template <class RandomAccessIterator> +inline bool operator >= (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) >= 0; } +template <class RandomAccessIterator> +inline bool operator <= (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) <= 0; } +// comparison to const charT* part 2: +template <class RandomAccessIterator> +inline bool operator == (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(0, m.length(), &s, 1) == 0; } +template <class RandomAccessIterator> +inline bool operator != (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(0, m.length(), &s, 1) != 0; } +template <class RandomAccessIterator> +inline bool operator < (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(0, m.length(), &s, 1) > 0; } +template <class RandomAccessIterator> +inline bool operator > (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(0, m.length(), &s, 1) < 0; } +template <class RandomAccessIterator> +inline bool operator <= (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(0, m.length(), &s, 1) >= 0; } +template <class RandomAccessIterator> +inline bool operator >= (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s, + const sub_match<RandomAccessIterator>& m) +{ return m.str().compare(0, m.length(), &s, 1) <= 0; } + +// addition operators: +template <class RandomAccessIterator, class traits, class Allocator> +inline std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator> +operator + (const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s, + const sub_match<RandomAccessIterator>& m) +{ + std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator> result; + result.reserve(s.size() + m.length() + 1); + return result.append(s).append(m.first, m.second); +} +template <class RandomAccessIterator, class traits, class Allocator> +inline std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator> +operator + (const sub_match<RandomAccessIterator>& m, + const std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator>& s) +{ + std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type, traits, Allocator> result; + result.reserve(s.size() + m.length() + 1); + return result.append(m.first, m.second).append(s); +} +#if !(defined(__GNUC__) && defined(BOOST_NO_STD_LOCALE)) +template <class RandomAccessIterator> +inline std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> +operator + (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s, + const sub_match<RandomAccessIterator>& m) +{ + std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> result; + result.reserve(std::char_traits<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type>::length(s) + m.length() + 1); + return result.append(s).append(m.first, m.second); +} +template <class RandomAccessIterator> +inline std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> +operator + (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const * s) +{ + std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> result; + result.reserve(std::char_traits<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type>::length(s) + m.length() + 1); + return result.append(m.first, m.second).append(s); +} +#else +// worwaround versions: +template <class RandomAccessIterator> +inline std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> +operator + (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const* s, + const sub_match<RandomAccessIterator>& m) +{ + return s + m.str(); +} +template <class RandomAccessIterator> +inline std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> +operator + (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const * s) +{ + return m.str() + s; +} +#endif +template <class RandomAccessIterator> +inline std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> +operator + (typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s, + const sub_match<RandomAccessIterator>& m) +{ + std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> result; + result.reserve(m.length() + 2); + return result.append(1, s).append(m.first, m.second); +} +template <class RandomAccessIterator> +inline std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> +operator + (const sub_match<RandomAccessIterator>& m, + typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type const& s) +{ + std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> result; + result.reserve(m.length() + 2); + return result.append(m.first, m.second).append(1, s); +} +template <class RandomAccessIterator> +inline std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> +operator + (const sub_match<RandomAccessIterator>& m1, + const sub_match<RandomAccessIterator>& m2) +{ + std::basic_string<typename re_detail::regex_iterator_traits<RandomAccessIterator>::value_type> result; + result.reserve(m1.length() + m2.length() + 1); + return result.append(m1.first, m1.second).append(m2.first, m2.second); +} +#ifndef BOOST_NO_STD_LOCALE +template <class charT, class traits, class RandomAccessIterator> +std::basic_ostream<charT, traits>& + operator << (std::basic_ostream<charT, traits>& os, + const sub_match<RandomAccessIterator>& s) +{ + return (os << s.str()); +} +#else +template <class RandomAccessIterator> +std::ostream& operator << (std::ostream& os, + const sub_match<RandomAccessIterator>& s) +{ + return (os << s.str()); +} +#endif + +#ifdef BOOST_OLD_REGEX_H +namespace re_detail{ +template <class BidiIterator, class charT> +int do_toi(BidiIterator i, BidiIterator j, char c, int radix) +{ + std::string s(i, j); + char* p; + int result = std::strtol(s.c_str(), &p, radix); + if(*p)raise_regex_exception("Bad sub-expression"); + return result; +} + +// +// helper: +template <class I, class charT> +int do_toi(I& i, I j, charT c) +{ + int result = 0; + while((i != j) && (isdigit(*i))) + { + result = result*10 + (*i - '0'); + ++i; + } + return result; +} +} + + +template <class BidiIterator> +sub_match<BidiIterator>::operator int()const +{ + BidiIterator i = first; + BidiIterator j = second; + if(i == j)raise_regex_exception("Bad sub-expression"); + int neg = 1; + if((i != j) && (*i == '-')) + { + neg = -1; + ++i; + } + neg *= re_detail::do_toi(i, j, *i); + if(i != j)raise_regex_exception("Bad sub-expression"); + return neg; +} +template <class BidiIterator> +sub_match<BidiIterator>::operator unsigned int()const +{ + BidiIterator i = first; + BidiIterator j = second; + if(i == j) + raise_regex_exception("Bad sub-expression"); + return re_detail::do_toi(i, j, *first); +} +#endif + +} // namespace boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/ext/boost/regex/v4/syntax_type.hpp b/ext/boost/regex/v4/syntax_type.hpp new file mode 100644 index 0000000000..3efdf0b0f9 --- /dev/null +++ b/ext/boost/regex/v4/syntax_type.hpp @@ -0,0 +1,105 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE syntax_type.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares regular expression synatx type enumerator. + */ + +#ifndef BOOST_REGEX_SYNTAX_TYPE_HPP +#define BOOST_REGEX_SYNTAX_TYPE_HPP + +namespace boost{ +namespace regex_constants{ + +typedef unsigned char syntax_type; + +// +// values chosen are binary compatible with previous version: +// +static const syntax_type syntax_char = 0; +static const syntax_type syntax_open_mark = 1; +static const syntax_type syntax_close_mark = 2; +static const syntax_type syntax_dollar = 3; +static const syntax_type syntax_caret = 4; +static const syntax_type syntax_dot = 5; +static const syntax_type syntax_star = 6; +static const syntax_type syntax_plus = 7; +static const syntax_type syntax_question = 8; +static const syntax_type syntax_open_set = 9; +static const syntax_type syntax_close_set = 10; +static const syntax_type syntax_or = 11; +static const syntax_type syntax_escape = 12; +static const syntax_type syntax_dash = 14; +static const syntax_type syntax_open_brace = 15; +static const syntax_type syntax_close_brace = 16; +static const syntax_type syntax_digit = 17; +static const syntax_type syntax_comma = 27; +static const syntax_type syntax_equal = 37; +static const syntax_type syntax_colon = 36; +static const syntax_type syntax_not = 53; + +// extensions: + +static const syntax_type syntax_hash = 13; +static const syntax_type syntax_newline = 26; + +// escapes: + +typedef syntax_type escape_syntax_type; + +static const escape_syntax_type escape_type_word_assert = 18; +static const escape_syntax_type escape_type_not_word_assert = 19; +static const escape_syntax_type escape_type_control_f = 29; +static const escape_syntax_type escape_type_control_n = 30; +static const escape_syntax_type escape_type_control_r = 31; +static const escape_syntax_type escape_type_control_t = 32; +static const escape_syntax_type escape_type_control_v = 33; +static const escape_syntax_type escape_type_ascii_control = 35; +static const escape_syntax_type escape_type_hex = 34; +static const escape_syntax_type escape_type_unicode = 0; // not used +static const escape_syntax_type escape_type_identity = 0; // not used +static const escape_syntax_type escape_type_backref = syntax_digit; +static const escape_syntax_type escape_type_decimal = syntax_digit; // not used +static const escape_syntax_type escape_type_class = 22; +static const escape_syntax_type escape_type_not_class = 23; + +// extensions: + +static const escape_syntax_type escape_type_left_word = 20; +static const escape_syntax_type escape_type_right_word = 21; +static const escape_syntax_type escape_type_start_buffer = 24; // for \` +static const escape_syntax_type escape_type_end_buffer = 25; // for \' +static const escape_syntax_type escape_type_control_a = 28; // for \a +static const escape_syntax_type escape_type_e = 38; // for \e +static const escape_syntax_type escape_type_E = 47; // for \Q\E +static const escape_syntax_type escape_type_Q = 48; // for \Q\E +static const escape_syntax_type escape_type_X = 49; // for \X +static const escape_syntax_type escape_type_C = 50; // for \C +static const escape_syntax_type escape_type_Z = 51; // for \Z +static const escape_syntax_type escape_type_G = 52; // for \G + +static const escape_syntax_type escape_type_property = 54; // for \p +static const escape_syntax_type escape_type_not_property = 55; // for \P +static const escape_syntax_type escape_type_named_char = 56; // for \N +static const escape_syntax_type escape_type_extended_backref = 57; // for \g +static const escape_syntax_type escape_type_reset_start_mark = 58; // for \K +static const escape_syntax_type escape_type_line_ending = 59; // for \R + +static const escape_syntax_type syntax_max = 60; + +} +} + + +#endif diff --git a/ext/boost/regex/v4/u32regex_iterator.hpp b/ext/boost/regex/v4/u32regex_iterator.hpp new file mode 100644 index 0000000000..7e893e6951 --- /dev/null +++ b/ext/boost/regex/v4/u32regex_iterator.hpp @@ -0,0 +1,193 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE u32regex_iterator.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Provides u32regex_iterator implementation. + */ + +#ifndef BOOST_REGEX_V4_U32REGEX_ITERATOR_HPP +#define BOOST_REGEX_V4_U32REGEX_ITERATOR_HPP + +namespace boost{ + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +template <class BidirectionalIterator> +class u32regex_iterator_implementation +{ + typedef u32regex regex_type; + + match_results<BidirectionalIterator> what; // current match + BidirectionalIterator base; // start of sequence + BidirectionalIterator end; // end of sequence + const regex_type re; // the expression + match_flag_type flags; // flags for matching + +public: + u32regex_iterator_implementation(const regex_type* p, BidirectionalIterator last, match_flag_type f) + : base(), end(last), re(*p), flags(f){} + bool init(BidirectionalIterator first) + { + base = first; + return u32regex_search(first, end, what, re, flags, base); + } + bool compare(const u32regex_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) && (end == that.end) && (flags == that.flags) && (what[0].first == that.what[0].first) && (what[0].second == that.what[0].second); + } + const match_results<BidirectionalIterator>& get() + { return what; } + bool next() + { + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail; + BidirectionalIterator next_start = what[0].second; + match_flag_type f(flags); + if(!what.length()) + f |= regex_constants::match_not_initial_null; + //if(base != next_start) + // f |= regex_constants::match_not_bob; + bool result = u32regex_search(next_start, end, what, re, f, base); + if(result) + what.set_base(base); + return result; + } +private: + u32regex_iterator_implementation& operator=(const u32regex_iterator_implementation&); +}; + +template <class BidirectionalIterator> +class u32regex_iterator +#ifndef BOOST_NO_STD_ITERATOR + : public std::iterator< + std::forward_iterator_tag, + match_results<BidirectionalIterator>, + typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type, + const match_results<BidirectionalIterator>*, + const match_results<BidirectionalIterator>& > +#endif +{ +private: + typedef u32regex_iterator_implementation<BidirectionalIterator> impl; + typedef shared_ptr<impl> pimpl; +public: + typedef u32regex regex_type; + typedef match_results<BidirectionalIterator> value_type; + typedef typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + u32regex_iterator(){} + u32regex_iterator(BidirectionalIterator a, BidirectionalIterator b, + const regex_type& re, + match_flag_type m = match_default) + : pdata(new impl(&re, b, m)) + { + if(!pdata->init(a)) + { + pdata.reset(); + } + } + u32regex_iterator(const u32regex_iterator& that) + : pdata(that.pdata) {} + u32regex_iterator& operator=(const u32regex_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const u32regex_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const u32regex_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + u32regex_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + u32regex_iterator operator++(int) + { + u32regex_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef u32regex_iterator<const char*> utf8regex_iterator; +typedef u32regex_iterator<const UChar*> utf16regex_iterator; +typedef u32regex_iterator<const UChar32*> utf32regex_iterator; + +inline u32regex_iterator<const char*> make_u32regex_iterator(const char* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator<const char*>(p, p+std::strlen(p), e, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_iterator<const wchar_t*> make_u32regex_iterator(const wchar_t* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator<const wchar_t*>(p, p+std::wcslen(p), e, m); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +inline u32regex_iterator<const UChar*> make_u32regex_iterator(const UChar* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator<const UChar*>(p, p+u_strlen(p), e, m); +} +#endif +template <class charT, class Traits, class Alloc> +inline u32regex_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> make_u32regex_iterator(const std::basic_string<charT, Traits, Alloc>& p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string<charT, Traits, Alloc>::const_iterator iter_type; + return u32regex_iterator<iter_type>(p.begin(), p.end(), e, m); +} +inline u32regex_iterator<const UChar*> make_u32regex_iterator(const UnicodeString& s, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator<const UChar*>(s.getBuffer(), s.getBuffer() + s.length(), e, m); +} + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + +} // namespace boost + +#endif // BOOST_REGEX_V4_REGEX_ITERATOR_HPP + diff --git a/ext/boost/regex/v4/u32regex_token_iterator.hpp b/ext/boost/regex/v4/u32regex_token_iterator.hpp new file mode 100644 index 0000000000..2726d486fd --- /dev/null +++ b/ext/boost/regex/v4/u32regex_token_iterator.hpp @@ -0,0 +1,377 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE u32regex_token_iterator.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Provides u32regex_token_iterator implementation. + */ + +#ifndef BOOST_REGEX_V4_U32REGEX_TOKEN_ITERATOR_HPP +#define BOOST_REGEX_V4_U32REGEX_TOKEN_ITERATOR_HPP + +#if (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) +// +// Borland C++ Builder 6, and Visual C++ 6, +// can't cope with the array template constructor +// so we have a template member that will accept any type as +// argument, and then assert that is really is an array: +// +#include <boost/static_assert.hpp> +#include <boost/type_traits/is_array.hpp> +#endif + +namespace boost{ + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) +# pragma warning(push) +# pragma warning(disable:4700) +#endif + +template <class BidirectionalIterator> +class u32regex_token_iterator_implementation +{ + typedef u32regex regex_type; + typedef sub_match<BidirectionalIterator> value_type; + + match_results<BidirectionalIterator> what; // current match + BidirectionalIterator end; // end of search area + BidirectionalIterator base; // start of search area + const regex_type re; // the expression + match_flag_type flags; // match flags + value_type result; // the current string result + int N; // the current sub-expression being enumerated + std::vector<int> subs; // the sub-expressions to enumerate + +public: + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, int sub, match_flag_type f) + : end(last), re(*p), flags(f){ subs.push_back(sub); } + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const std::vector<int>& v, match_flag_type f) + : end(last), re(*p), flags(f), subs(v){} +#if BOOST_WORKAROUND(BOOST_MSVC, < 1300) + // can't reliably get this to work.... +#elif (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) \ + || BOOST_WORKAROUND(__HP_aCC, < 60700) + template <class T> + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const T& submatches, match_flag_type f) + : end(last), re(*p), flags(f) + { + // assert that T really is an array: + BOOST_STATIC_ASSERT(::boost::is_array<T>::value); + const std::size_t array_size = sizeof(T) / sizeof(submatches[0]); + for(std::size_t i = 0; i < array_size; ++i) + { + subs.push_back(submatches[i]); + } + } +#else + template <std::size_t CN> + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const int (&submatches)[CN], match_flag_type f) + : end(last), re(*p), flags(f) + { + for(std::size_t i = 0; i < CN; ++i) + { + subs.push_back(submatches[i]); + } + } +#endif + + bool init(BidirectionalIterator first) + { + base = first; + N = 0; + if(u32regex_search(first, end, what, re, flags, base) == true) + { + N = 0; + result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]); + return true; + } + else if((subs[N] == -1) && (first != end)) + { + result.first = first; + result.second = end; + result.matched = (first != end); + N = -1; + return true; + } + return false; + } + bool compare(const u32regex_token_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) + && (end == that.end) + && (flags == that.flags) + && (N == that.N) + && (what[0].first == that.what[0].first) + && (what[0].second == that.what[0].second); + } + const value_type& get() + { return result; } + bool next() + { + if(N == -1) + return false; + if(N+1 < (int)subs.size()) + { + ++N; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail | regex_constants::match_not_bob; + BidirectionalIterator last_end(what[0].second); + if(u32regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags), base)) + { + N =0; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + else if((last_end != end) && (subs[0] == -1)) + { + N =-1; + result.first = last_end; + result.second = end; + result.matched = (last_end != end); + return true; + } + return false; + } +private: + u32regex_token_iterator_implementation& operator=(const u32regex_token_iterator_implementation&); +}; + +template <class BidirectionalIterator> +class u32regex_token_iterator +#ifndef BOOST_NO_STD_ITERATOR + : public std::iterator< + std::forward_iterator_tag, + sub_match<BidirectionalIterator>, + typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type, + const sub_match<BidirectionalIterator>*, + const sub_match<BidirectionalIterator>& > +#endif +{ +private: + typedef u32regex_token_iterator_implementation<BidirectionalIterator> impl; + typedef shared_ptr<impl> pimpl; +public: + typedef u32regex regex_type; + typedef sub_match<BidirectionalIterator> value_type; + typedef typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + u32regex_token_iterator(){} + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + int submatch = 0, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatch, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const std::vector<int>& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#if BOOST_WORKAROUND(BOOST_MSVC, < 1300) + // can't reliably get this to work.... +#elif (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) \ + || BOOST_WORKAROUND(__HP_aCC, < 60700) + template <class T> + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const T& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#else + template <std::size_t N> + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const int (&submatches)[N], match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#endif + u32regex_token_iterator(const u32regex_token_iterator& that) + : pdata(that.pdata) {} + u32regex_token_iterator& operator=(const u32regex_token_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const u32regex_token_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const u32regex_token_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + u32regex_token_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + u32regex_token_iterator operator++(int) + { + u32regex_token_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef u32regex_token_iterator<const char*> utf8regex_token_iterator; +typedef u32regex_token_iterator<const UChar*> utf16regex_token_iterator; +typedef u32regex_token_iterator<const UChar32*> utf32regex_token_iterator; + +// construction from an integral sub_match state_id: +inline u32regex_token_iterator<const char*> make_u32regex_token_iterator(const char* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const char*>(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_token_iterator<const wchar_t*> make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const wchar_t*>(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UChar* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const UChar*>(p, p+u_strlen(p), e, submatch, m); +} +#endif +template <class charT, class Traits, class Alloc> +inline u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> make_u32regex_token_iterator(const std::basic_string<charT, Traits, Alloc>& p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string<charT, Traits, Alloc>::const_iterator iter_type; + return u32regex_token_iterator<iter_type>(p.begin(), p.end(), e, m); +} +inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UnicodeString& s, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const UChar*>(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} + +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) +// construction from a reference to an array: +template <std::size_t N> +inline u32regex_token_iterator<const char*> make_u32regex_token_iterator(const char* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const char*>(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +template <std::size_t N> +inline u32regex_token_iterator<const wchar_t*> make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const wchar_t*>(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +template <std::size_t N> +inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UChar* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const UChar*>(p, p+u_strlen(p), e, m); +} +#endif +template <class charT, class Traits, class Alloc, std::size_t N> +inline u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> make_u32regex_token_iterator(const std::basic_string<charT, Traits, Alloc>& p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string<charT, Traits, Alloc>::const_iterator iter_type; + return u32regex_token_iterator<iter_type>(p.begin(), p.end(), e, m); +} +template <std::size_t N> +inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UnicodeString& s, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const UChar*>(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} +#endif // BOOST_MSVC < 1300 + +// construction from a vector of sub_match state_id's: +inline u32regex_token_iterator<const char*> make_u32regex_token_iterator(const char* p, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const char*>(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_token_iterator<const wchar_t*> make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const wchar_t*>(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UChar* p, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const UChar*>(p, p+u_strlen(p), e, submatch, m); +} +#endif +template <class charT, class Traits, class Alloc> +inline u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> make_u32regex_token_iterator(const std::basic_string<charT, Traits, Alloc>& p, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string<charT, Traits, Alloc>::const_iterator iter_type; + return u32regex_token_iterator<iter_type>(p.begin(), p.end(), e, m); +} +inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UnicodeString& s, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator<const UChar*>(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} + +#if BOOST_WORKAROUND(BOOST_MSVC, == 1310) +# pragma warning(pop) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + +} // namespace boost + +#endif // BOOST_REGEX_V4_REGEX_TOKEN_ITERATOR_HPP + + + + diff --git a/ext/boost/regex/v4/w32_regex_traits.hpp b/ext/boost/regex/v4/w32_regex_traits.hpp new file mode 100644 index 0000000000..d55620726d --- /dev/null +++ b/ext/boost/regex/v4/w32_regex_traits.hpp @@ -0,0 +1,741 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE w32_regex_traits.hpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: Declares regular expression traits class w32_regex_traits. + */ + +#ifndef BOOST_W32_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_W32_REGEX_TRAITS_HPP_INCLUDED + +#ifndef BOOST_RE_PAT_EXCEPT_HPP +#include <boost/regex/pattern_except.hpp> +#endif +#ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED +#include <boost/regex/v4/regex_traits_defaults.hpp> +#endif +#ifdef BOOST_HAS_THREADS +#include <boost/regex/pending/static_mutex.hpp> +#endif +#ifndef BOOST_REGEX_PRIMARY_TRANSFORM +#include <boost/regex/v4/primary_transform.hpp> +#endif +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#include <boost/regex/pending/object_cache.hpp> +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4786) +#pragma warning(disable:4800) +#endif + +namespace boost{ + +// +// forward declaration is needed by some compilers: +// +template <class charT> +class w32_regex_traits; + +namespace re_detail{ + +// +// start by typedeffing the types we'll need: +// +typedef ::boost::uint32_t lcid_type; // placeholder for LCID. +typedef ::boost::shared_ptr<void> cat_type; // placeholder for dll HANDLE. + +// +// then add wrappers around the actual Win32 API's (ie implementation hiding): +// +BOOST_REGEX_DECL lcid_type BOOST_REGEX_CALL w32_get_default_locale(); +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(char, lcid_type); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(wchar_t, lcid_type); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(unsigned short ca, lcid_type state_id); +#endif +#endif +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(char, lcid_type); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(wchar_t, lcid_type); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(unsigned short ca, lcid_type state_id); +#endif +#endif +BOOST_REGEX_DECL cat_type BOOST_REGEX_CALL w32_cat_open(const std::string& name); +BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::string& def); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::wstring& def); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL std::basic_string<unsigned short> BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::basic_string<unsigned short>& def); +#endif +#endif +BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_transform(lcid_type state_id, const char* p1, const char* p2); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_transform(lcid_type state_id, const wchar_t* p1, const wchar_t* p2); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL std::basic_string<unsigned short> BOOST_REGEX_CALL w32_transform(lcid_type state_id, const unsigned short* p1, const unsigned short* p2); +#endif +#endif +BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_tolower(char c, lcid_type); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_tolower(wchar_t c, lcid_type); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_tolower(unsigned short c, lcid_type state_id); +#endif +#endif +BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_toupper(char c, lcid_type); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_toupper(wchar_t c, lcid_type); +#endif +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type, boost::uint32_t mask, char c); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type, boost::uint32_t mask, wchar_t c); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type state_id, boost::uint32_t m, unsigned short c); +#endif +#endif +// +// class w32_regex_traits_base: +// acts as a container for locale and the facets we are using. +// +template <class charT> +struct w32_regex_traits_base +{ + w32_regex_traits_base(lcid_type l) + { imbue(l); } + lcid_type imbue(lcid_type l); + + lcid_type m_locale; +}; + +template <class charT> +inline lcid_type w32_regex_traits_base<charT>::imbue(lcid_type l) +{ + lcid_type result(m_locale); + m_locale = l; + return result; +} + +// +// class w32_regex_traits_char_layer: +// implements methods that require specialisation for narrow characters: +// +template <class charT> +class w32_regex_traits_char_layer : public w32_regex_traits_base<charT> +{ + typedef std::basic_string<charT> string_type; + typedef std::map<charT, regex_constants::syntax_type> map_type; + typedef typename map_type::const_iterator map_iterator_type; +public: + w32_regex_traits_char_layer(const lcid_type l); + + regex_constants::syntax_type syntax_type(charT c)const + { + map_iterator_type i = m_char_map.find(c); + return ((i == m_char_map.end()) ? 0 : i->second); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + map_iterator_type i = m_char_map.find(c); + if(i == m_char_map.end()) + { + if(::boost::re_detail::w32_is_lower(c, this->m_locale)) return regex_constants::escape_type_class; + if(::boost::re_detail::w32_is_upper(c, this->m_locale)) return regex_constants::escape_type_not_class; + return 0; + } + return i->second; + } + charT tolower(charT c)const + { + return ::boost::re_detail::w32_tolower(c, this->m_locale); + } + bool isctype(boost::uint32_t mask, charT c)const + { + return ::boost::re_detail::w32_is(this->m_locale, mask, c); + } + +private: + string_type get_default_message(regex_constants::syntax_type); + // TODO: use a hash table when available! + map_type m_char_map; +}; + +template <class charT> +w32_regex_traits_char_layer<charT>::w32_regex_traits_char_layer(::boost::re_detail::lcid_type l) + : w32_regex_traits_base<charT>(l) +{ + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + cat_type cat; + std::string cat_name(w32_regex_traits<charT>::get_catalog_name()); + if(cat_name.size()) + { + cat = ::boost::re_detail::w32_cat_open(cat_name); + if(!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::re_detail::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if(cat) + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = ::boost::re_detail::w32_cat_get(cat, this->m_locale, i, get_default_message(i)); + for(typename string_type::size_type j = 0; j < mss.size(); ++j) + { + this->m_char_map[mss[j]] = i; + } + } + } + else + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while(ptr && *ptr) + { + this->m_char_map[static_cast<charT>(*ptr)] = i; + ++ptr; + } + } + } +} + +template <class charT> +typename w32_regex_traits_char_layer<charT>::string_type + w32_regex_traits_char_layer<charT>::get_default_message(regex_constants::syntax_type i) +{ + const char* ptr = get_default_syntax(i); + string_type result; + while(ptr && *ptr) + { + result.append(1, static_cast<charT>(*ptr)); + ++ptr; + } + return result; +} + +// +// specialised version for narrow characters: +// +template <> +class BOOST_REGEX_DECL w32_regex_traits_char_layer<char> : public w32_regex_traits_base<char> +{ + typedef std::string string_type; +public: + w32_regex_traits_char_layer(::boost::re_detail::lcid_type l) + : w32_regex_traits_base<char>(l) + { + init(); + } + + regex_constants::syntax_type syntax_type(char c)const + { + return m_char_map[static_cast<unsigned char>(c)]; + } + regex_constants::escape_syntax_type escape_syntax_type(char c) const + { + return m_char_map[static_cast<unsigned char>(c)]; + } + char tolower(char c)const + { + return m_lower_map[static_cast<unsigned char>(c)]; + } + bool isctype(boost::uint32_t mask, char c)const + { + return m_type_map[static_cast<unsigned char>(c)] & mask; + } + +private: + regex_constants::syntax_type m_char_map[1u << CHAR_BIT]; + char m_lower_map[1u << CHAR_BIT]; + boost::uint16_t m_type_map[1u << CHAR_BIT]; + void init(); +}; + +// +// class w32_regex_traits_implementation: +// provides pimpl implementation for w32_regex_traits. +// +template <class charT> +class w32_regex_traits_implementation : public w32_regex_traits_char_layer<charT> +{ +public: + typedef typename w32_regex_traits<charT>::char_class_type char_class_type; + BOOST_STATIC_CONSTANT(char_class_type, mask_word = 0x0400); // must be C1_DEFINED << 1 + BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 0x0800); // must be C1_DEFINED << 2 + BOOST_STATIC_CONSTANT(char_class_type, mask_horizontal = 0x1000); // must be C1_DEFINED << 3 + BOOST_STATIC_CONSTANT(char_class_type, mask_vertical = 0x2000); // must be C1_DEFINED << 4 + BOOST_STATIC_CONSTANT(char_class_type, mask_base = 0x3ff); // all the masks used by the CT_CTYPE1 group + + typedef std::basic_string<charT> string_type; + typedef charT char_type; + w32_regex_traits_implementation(::boost::re_detail::lcid_type l); + std::string error_string(regex_constants::error_type n) const + { + if(!m_error_strings.empty()) + { + std::map<int, std::string>::const_iterator p = m_error_strings.find(n); + return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second; + } + return get_default_error_string(n); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + char_class_type result = lookup_classname_imp(p1, p2); + if(result == 0) + { + typedef typename string_type::size_type size_type; + string_type temp(p1, p2); + for(size_type i = 0; i < temp.size(); ++i) + temp[i] = this->tolower(temp[i]); + result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size()); + } + return result; + } + string_type lookup_collatename(const charT* p1, const charT* p2) const; + string_type transform_primary(const charT* p1, const charT* p2) const; + string_type transform(const charT* p1, const charT* p2) const + { + return ::boost::re_detail::w32_transform(this->m_locale, p1, p2); + } +private: + std::map<int, std::string> m_error_strings; // error messages indexed by numberic ID + std::map<string_type, char_class_type> m_custom_class_names; // character class names + std::map<string_type, string_type> m_custom_collate_names; // collating element names + unsigned m_collate_type; // the form of the collation string + charT m_collate_delim; // the collation group delimiter + // + // helpers: + // + char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const; +}; + +template <class charT> +typename w32_regex_traits_implementation<charT>::string_type + w32_regex_traits_implementation<charT>::transform_primary(const charT* p1, const charT* p2) const +{ + string_type result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch(m_collate_type) + { + case sort_C: + case sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + typedef typename string_type::size_type size_type; + for(size_type i = 0; i < result.size(); ++i) + result[i] = this->tolower(result[i]); + result = this->transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case sort_fixed: + { + // get a regular sort key, and then truncate it: + result.assign(this->transform(p1, p2)); + result.erase(this->m_collate_delim); + break; + } + case sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result.assign(this->transform(p1, p2)); + std::size_t i; + for(i = 0; i < result.size(); ++i) + { + if(result[i] == m_collate_delim) + break; + } + result.erase(i); + break; + } + if(result.empty()) + result = string_type(1, charT(0)); + return result; +} + +template <class charT> +typename w32_regex_traits_implementation<charT>::string_type + w32_regex_traits_implementation<charT>::lookup_collatename(const charT* p1, const charT* p2) const +{ + typedef typename std::map<string_type, string_type>::const_iterator iter_type; + if(m_custom_collate_names.size()) + { + iter_type pos = m_custom_collate_names.find(string_type(p1, p2)); + if(pos != m_custom_collate_names.end()) + return pos->second; + } +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) + std::string name(p1, p2); +#else + std::string name; + const charT* p0 = p1; + while(p0 != p2) + name.append(1, char(*p0++)); +#endif + name = lookup_default_collate_name(name); +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) + if(name.size()) + return string_type(name.begin(), name.end()); +#else + if(name.size()) + { + string_type result; + typedef std::string::const_iterator iter; + iter b = name.begin(); + iter e = name.end(); + while(b != e) + result.append(1, charT(*b++)); + return result; + } +#endif + if(p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +template <class charT> +w32_regex_traits_implementation<charT>::w32_regex_traits_implementation(::boost::re_detail::lcid_type l) +: w32_regex_traits_char_layer<charT>(l) +{ + cat_type cat; + std::string cat_name(w32_regex_traits<charT>::get_catalog_name()); + if(cat_name.size()) + { + cat = ::boost::re_detail::w32_cat_open(cat_name); + if(!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + boost::re_detail::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if(cat) + { + // + // Error messages: + // + for(boost::regex_constants::error_type i = static_cast<boost::regex_constants::error_type>(0); + i <= boost::regex_constants::error_unknown; + i = static_cast<boost::regex_constants::error_type>(i + 1)) + { + const char* p = get_default_error_string(i); + string_type default_message; + while(*p) + { + default_message.append(1, static_cast<charT>(*p)); + ++p; + } + string_type s = ::boost::re_detail::w32_cat_get(cat, this->m_locale, i+200, default_message); + std::string result; + for(std::string::size_type j = 0; j < s.size(); ++j) + { + result.append(1, static_cast<char>(s[j])); + } + m_error_strings[i] = result; + } + // + // Custom class names: + // + static const char_class_type masks[14] = + { + 0x0104u, // C1_ALPHA | C1_DIGIT + 0x0100u, // C1_ALPHA + 0x0020u, // C1_CNTRL + 0x0004u, // C1_DIGIT + (~(0x0020u|0x0008u) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE + 0x0002u, // C1_LOWER + (~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL + 0x0010u, // C1_PUNCT + 0x0008u, // C1_SPACE + 0x0001u, // C1_UPPER + 0x0080u, // C1_XDIGIT + 0x0040u, // C1_BLANK + w32_regex_traits_implementation<charT>::mask_word, + w32_regex_traits_implementation<charT>::mask_unicode, + }; + static const string_type null_string; + for(unsigned int j = 0; j <= 13; ++j) + { + string_type s(::boost::re_detail::w32_cat_get(cat, this->m_locale, j+300, null_string)); + if(s.size()) + this->m_custom_class_names[s] = masks[j]; + } + } + // + // get the collation format used by m_pcollate: + // + m_collate_type = re_detail::find_sort_syntax(this, &m_collate_delim); +} + +template <class charT> +typename w32_regex_traits_implementation<charT>::char_class_type + w32_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const +{ + static const char_class_type masks[22] = + { + 0, + 0x0104u, // C1_ALPHA | C1_DIGIT + 0x0100u, // C1_ALPHA + 0x0040u, // C1_BLANK + 0x0020u, // C1_CNTRL + 0x0004u, // C1_DIGIT + 0x0004u, // C1_DIGIT + (~(0x0020u|0x0008u|0x0040) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE or C1_BLANK + w32_regex_traits_implementation<charT>::mask_horizontal, + 0x0002u, // C1_LOWER + 0x0002u, // C1_LOWER + (~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL + 0x0010u, // C1_PUNCT + 0x0008u, // C1_SPACE + 0x0008u, // C1_SPACE + 0x0001u, // C1_UPPER + w32_regex_traits_implementation<charT>::mask_unicode, + 0x0001u, // C1_UPPER + w32_regex_traits_implementation<charT>::mask_vertical, + 0x0104u | w32_regex_traits_implementation<charT>::mask_word, + 0x0104u | w32_regex_traits_implementation<charT>::mask_word, + 0x0080u, // C1_XDIGIT + }; + if(m_custom_class_names.size()) + { + typedef typename std::map<std::basic_string<charT>, char_class_type>::const_iterator map_iter; + map_iter pos = m_custom_class_names.find(string_type(p1, p2)); + if(pos != m_custom_class_names.end()) + return pos->second; + } + std::size_t state_id = 1 + re_detail::get_default_class_id(p1, p2); + if(state_id < sizeof(masks) / sizeof(masks[0])) + return masks[state_id]; + return masks[0]; +} + + +template <class charT> +boost::shared_ptr<const w32_regex_traits_implementation<charT> > create_w32_regex_traits(::boost::re_detail::lcid_type l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT)) +{ + // TODO: create a cache for previously constructed objects. + return boost::object_cache< ::boost::re_detail::lcid_type, w32_regex_traits_implementation<charT> >::get(l, 5); +} + +} // re_detail + +template <class charT> +class w32_regex_traits +{ +public: + typedef charT char_type; + typedef std::size_t size_type; + typedef std::basic_string<char_type> string_type; + typedef ::boost::re_detail::lcid_type locale_type; + typedef boost::uint_least32_t char_class_type; + + struct boost_extensions_tag{}; + + w32_regex_traits() + : m_pimpl(re_detail::create_w32_regex_traits<charT>(::boost::re_detail::w32_get_default_locale())) + { } + static size_type length(const char_type* p) + { + return std::char_traits<charT>::length(p); + } + regex_constants::syntax_type syntax_type(charT c)const + { + return m_pimpl->syntax_type(c); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + return m_pimpl->escape_syntax_type(c); + } + charT translate(charT c) const + { + return c; + } + charT translate_nocase(charT c) const + { + return this->m_pimpl->tolower(c); + } + charT translate(charT c, bool icase) const + { + return icase ? this->m_pimpl->tolower(c) : c; + } + charT tolower(charT c) const + { + return this->m_pimpl->tolower(c); + } + charT toupper(charT c) const + { + return ::boost::re_detail::w32_toupper(c, this->m_pimpl->m_locale); + } + string_type transform(const charT* p1, const charT* p2) const + { + return ::boost::re_detail::w32_transform(this->m_pimpl->m_locale, p1, p2); + } + string_type transform_primary(const charT* p1, const charT* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_classname(p1, p2); + } + string_type lookup_collatename(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_collatename(p1, p2); + } + bool isctype(charT c, char_class_type f) const + { + if((f & re_detail::w32_regex_traits_implementation<charT>::mask_base) + && (this->m_pimpl->isctype(f & re_detail::w32_regex_traits_implementation<charT>::mask_base, c))) + return true; + else if((f & re_detail::w32_regex_traits_implementation<charT>::mask_unicode) && re_detail::is_extended(c)) + return true; + else if((f & re_detail::w32_regex_traits_implementation<charT>::mask_word) && (c == '_')) + return true; + else if((f & re_detail::w32_regex_traits_implementation<charT>::mask_vertical) + && (::boost::re_detail::is_separator(c) || (c == '\v'))) + return true; + else if((f & re_detail::w32_regex_traits_implementation<charT>::mask_horizontal) + && this->isctype(c, 0x0008u) && !this->isctype(c, re_detail::w32_regex_traits_implementation<charT>::mask_vertical)) + return true; + return false; + } + int toi(const charT*& p1, const charT* p2, int radix)const + { + return ::boost::re_detail::global_toi(p1, p2, radix, *this); + } + int value(charT c, int radix)const + { + int result = ::boost::re_detail::global_value(c); + return result < radix ? result : -1; + } + locale_type imbue(locale_type l) + { + ::boost::re_detail::lcid_type result(getloc()); + m_pimpl = re_detail::create_w32_regex_traits<charT>(l); + return result; + } + locale_type getloc()const + { + return m_pimpl->m_locale; + } + std::string error_string(regex_constants::error_type n) const + { + return m_pimpl->error_string(n); + } + + // + // extension: + // set the name of the message catalog in use (defaults to "boost_regex"). + // + static std::string catalog_name(const std::string& name); + static std::string get_catalog_name(); + +private: + boost::shared_ptr<const re_detail::w32_regex_traits_implementation<charT> > m_pimpl; + // + // catalog name handler: + // + static std::string& get_catalog_name_inst(); + +#ifdef BOOST_HAS_THREADS + static static_mutex& get_mutex_inst(); +#endif +}; + +template <class charT> +std::string w32_regex_traits<charT>::catalog_name(const std::string& name) +{ +#ifdef BOOST_HAS_THREADS + static_mutex::scoped_lock lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + get_catalog_name_inst() = name; + return result; +} + +template <class charT> +std::string& w32_regex_traits<charT>::get_catalog_name_inst() +{ + static std::string s_name; + return s_name; +} + +template <class charT> +std::string w32_regex_traits<charT>::get_catalog_name() +{ +#ifdef BOOST_HAS_THREADS + static_mutex::scoped_lock lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + return result; +} + +#ifdef BOOST_HAS_THREADS +template <class charT> +static_mutex& w32_regex_traits<charT>::get_mutex_inst() +{ + static static_mutex s_mutex = BOOST_STATIC_MUTEX_INIT; + return s_mutex; +} +#endif + + +} // boost + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif |