1616
1717use mem;
1818use char;
19+ use char:: Char ;
1920use clone:: Clone ;
2021use cmp;
2122use cmp:: { PartialEq , Eq } ;
@@ -24,7 +25,7 @@ use default::Default;
2425use iter:: { Filter , Map , Iterator } ;
2526use iter:: { DoubleEndedIterator , ExactSize } ;
2627use iter:: range;
27- use num:: Saturating ;
28+ use num:: { CheckedMul , Saturating } ;
2829use option:: { None , Option , Some } ;
2930use raw:: Repr ;
3031use slice:: ImmutableVector ;
@@ -557,6 +558,41 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
557558 }
558559}
559560
561+ /// External iterator for a string's UTF16 codeunits.
562+ /// Use with the `std::iter` module.
563+ #[ deriving( Clone ) ]
564+ pub struct Utf16CodeUnits < ' a > {
565+ chars : Chars < ' a > ,
566+ extra : u16
567+ }
568+
569+ impl < ' a > Iterator < u16 > for Utf16CodeUnits < ' a > {
570+ #[ inline]
571+ fn next ( & mut self ) -> Option < u16 > {
572+ if self . extra != 0 {
573+ let tmp = self . extra ;
574+ self . extra = 0 ;
575+ return Some ( tmp) ;
576+ }
577+
578+ let mut buf = [ 0u16 , ..2 ] ;
579+ self . chars . next ( ) . map ( |ch| {
580+ let n = ch. encode_utf16 ( buf /* as mut slice! */ ) ;
581+ if n == 2 { self . extra = buf[ 1 ] ; }
582+ buf[ 0 ]
583+ } )
584+ }
585+
586+ #[ inline]
587+ fn size_hint ( & self ) -> ( uint , Option < uint > ) {
588+ let ( low, high) = self . chars . size_hint ( ) ;
589+ // every char gets either one u16 or two u16,
590+ // so this iterator is between 1 or 2 times as
591+ // long as the underlying iterator.
592+ ( low, high. and_then ( |n| n. checked_mul ( & 2 ) ) )
593+ }
594+ }
595+
560596/*
561597Section: Comparing strings
562598*/
@@ -1619,6 +1655,9 @@ pub trait StrSlice<'a> {
16191655 /// and that it is not reallocated (e.g. by pushing to the
16201656 /// string).
16211657 fn as_ptr ( & self ) -> * const u8 ;
1658+
1659+ /// Return an iterator of `u16` over the string encoded as UTF-16.
1660+ fn utf16_units ( & self ) -> Utf16CodeUnits < ' a > ;
16221661}
16231662
16241663impl < ' a > StrSlice < ' a > for & ' a str {
@@ -1967,6 +2006,11 @@ impl<'a> StrSlice<'a> for &'a str {
19672006 fn as_ptr ( & self ) -> * const u8 {
19682007 self . repr ( ) . data
19692008 }
2009+
2010+ #[ inline]
2011+ fn utf16_units ( & self ) -> Utf16CodeUnits < ' a > {
2012+ Utf16CodeUnits { chars : self . chars ( ) , extra : 0 }
2013+ }
19702014}
19712015
19722016impl < ' a > Default for & ' a str {
0 commit comments