{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 701
{-# LANGUAGE Trustworthy #-}
#endif
module Data.ByteString.UTF8
( B.ByteString
, decode
, replacement_char
, uncons
, splitAt
, take
, drop
, span
, break
, fromChar
, fromString
, toString
, foldl
, foldr
, length
, lines
, lines'
) where
import Data.Bits
import Data.Word
import qualified Data.ByteString as B
import Prelude hiding (take,drop,splitAt,span,break,foldr,foldl,length,lines)
import Codec.Binary.UTF8.String(encode)
import Codec.Binary.UTF8.Generic (buncons)
fromChar :: Char -> B.ByteString
fromChar :: Char -> ByteString
fromChar x :: Char
x = String -> ByteString
fromString [Char
x]
fromString :: String -> B.ByteString
fromString :: String -> ByteString
fromString xs :: String
xs = [Word8] -> ByteString
B.pack (String -> [Word8]
encode String
xs)
toString :: B.ByteString -> String
toString :: ByteString -> String
toString bs :: ByteString
bs = (Char -> String -> String) -> String -> ByteString -> String
forall a. (Char -> a -> a) -> a -> ByteString -> a
foldr (:) [] ByteString
bs
replacement_char :: Char
replacement_char :: Char
replacement_char = '\xfffd'
decode :: B.ByteString -> Maybe (Char,Int)
decode :: ByteString -> Maybe (Char, Int)
decode bs :: ByteString
bs = do (c :: Word8
c,cs :: ByteString
cs) <- ByteString -> Maybe (Word8, ByteString)
forall b s. UTF8Bytes b s => b -> Maybe (Word8, b)
buncons ByteString
bs
(Char, Int) -> Maybe (Char, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> ByteString -> (Char, Int)
choose (Word8 -> Int
forall a. Enum a => a -> Int
fromEnum Word8
c) ByteString
cs)
where
choose :: Int -> B.ByteString -> (Char, Int)
choose :: Int -> ByteString -> (Char, Int)
choose c :: Int
c cs :: ByteString
cs
| Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0x80 = (Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Int -> Int
forall a. Enum a => a -> Int
fromEnum Int
c, 1)
| Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0xc0 = (Char
replacement_char, 1)
| Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0xe0 = Int -> ByteString -> (Char, Int)
bytes2 (Int -> Int -> Int
mask Int
c 0x1f) ByteString
cs
| Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0xf0 = Int -> ByteString -> (Char, Int)
bytes3 (Int -> Int -> Int
mask Int
c 0x0f) ByteString
cs
| Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0xf8 = Int -> ByteString -> (Char, Int)
bytes4 (Int -> Int -> Int
mask Int
c 0x07) ByteString
cs
| Bool
otherwise = (Char
replacement_char, 1)
mask :: Int -> Int -> Int
mask :: Int -> Int -> Int
mask c :: Int
c m :: Int
m = Int -> Int
forall a. Enum a => a -> Int
fromEnum (Int
c Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
m)
combine :: Int -> Word8 -> Int
combine :: Int -> Word8 -> Int
combine acc :: Int
acc r :: Word8
r = Int -> Int -> Int
forall a. Bits a => a -> Int -> a
shiftL Int
acc 6 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Word8 -> Int
forall a. Enum a => a -> Int
fromEnum (Word8
r Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. 0x3f)
follower :: Int -> Word8 -> Maybe Int
follower :: Int -> Word8 -> Maybe Int
follower acc :: Int
acc r :: Word8
r | Word8
r Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. 0xc0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== 0x80 = Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Word8 -> Int
combine Int
acc Word8
r)
follower _ _ = Maybe Int
forall a. Maybe a
Nothing
{-# INLINE get_follower #-}
get_follower :: Int -> B.ByteString -> Maybe (Int, B.ByteString)
get_follower :: Int -> ByteString -> Maybe (Int, ByteString)
get_follower acc :: Int
acc cs :: ByteString
cs = do (x :: Word8
x,xs :: ByteString
xs) <- ByteString -> Maybe (Word8, ByteString)
forall b s. UTF8Bytes b s => b -> Maybe (Word8, b)
buncons ByteString
cs
Int
acc1 <- Int -> Word8 -> Maybe Int
follower Int
acc Word8
x
(Int, ByteString) -> Maybe (Int, ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
acc1,ByteString
xs)
bytes2 :: Int -> B.ByteString -> (Char, Int)
bytes2 :: Int -> ByteString -> (Char, Int)
bytes2 c :: Int
c cs :: ByteString
cs = case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
c ByteString
cs of
Just (d :: Int
d, _) | Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0x80 -> (Int -> Char
forall a. Enum a => Int -> a
toEnum Int
d, 2)
| Bool
otherwise -> (Char
replacement_char, 1)
_ -> (Char
replacement_char, 1)
bytes3 :: Int -> B.ByteString -> (Char, Int)
bytes3 :: Int -> ByteString -> (Char, Int)
bytes3 c :: Int
c cs :: ByteString
cs =
case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
c ByteString
cs of
Just (d1 :: Int
d1, cs1 :: ByteString
cs1) ->
case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
d1 ByteString
cs1 of
Just (d :: Int
d, _) | (Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0x800 Bool -> Bool -> Bool
&& Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0xd800) Bool -> Bool -> Bool
||
(Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0xdfff Bool -> Bool -> Bool
&& Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0xfffe) -> (Int -> Char
forall a. Enum a => Int -> a
toEnum Int
d, 3)
| Bool
otherwise -> (Char
replacement_char, 3)
_ -> (Char
replacement_char, 2)
_ -> (Char
replacement_char, 1)
bytes4 :: Int -> B.ByteString -> (Char, Int)
bytes4 :: Int -> ByteString -> (Char, Int)
bytes4 c :: Int
c cs :: ByteString
cs =
case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
c ByteString
cs of
Just (d1 :: Int
d1, cs1 :: ByteString
cs1) ->
case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
d1 ByteString
cs1 of
Just (d2 :: Int
d2, cs2 :: ByteString
cs2) ->
case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
d2 ByteString
cs2 of
Just (d :: Int
d,_) | Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0x10000 Bool -> Bool -> Bool
&& Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0x110000 -> (Int -> Char
forall a. Enum a => Int -> a
toEnum Int
d, 4)
| Bool
otherwise -> (Char
replacement_char, 4)
_ -> (Char
replacement_char, 3)
_ -> (Char
replacement_char, 2)
_ -> (Char
replacement_char, 1)
splitAt :: Int -> B.ByteString -> (B.ByteString,B.ByteString)
splitAt :: Int -> ByteString -> (ByteString, ByteString)
splitAt x :: Int
x bs :: ByteString
bs = Int -> Int -> ByteString -> (ByteString, ByteString)
forall t.
(Ord t, Num t) =>
Int -> t -> ByteString -> (ByteString, ByteString)
loop 0 Int
x ByteString
bs
where loop :: Int -> t -> ByteString -> (ByteString, ByteString)
loop a :: Int
a n :: t
n _ | t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
<= 0 = Int -> ByteString -> (ByteString, ByteString)
B.splitAt Int
a ByteString
bs
loop a :: Int
a n :: t
n bs1 :: ByteString
bs1 = case ByteString -> Maybe (Char, Int)
decode ByteString
bs1 of
Just (_,y :: Int
y) -> Int -> t -> ByteString -> (ByteString, ByteString)
loop (Int
aInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
y) (t
nt -> t -> t
forall a. Num a => a -> a -> a
-1) (Int -> ByteString -> ByteString
B.drop Int
y ByteString
bs1)
Nothing -> (ByteString
bs, ByteString
B.empty)
take :: Int -> B.ByteString -> B.ByteString
take :: Int -> ByteString -> ByteString
take n :: Int
n bs :: ByteString
bs = (ByteString, ByteString) -> ByteString
forall a b. (a, b) -> a
fst (Int -> ByteString -> (ByteString, ByteString)
splitAt Int
n ByteString
bs)
drop :: Int -> B.ByteString -> B.ByteString
drop :: Int -> ByteString -> ByteString
drop n :: Int
n bs :: ByteString
bs = (ByteString, ByteString) -> ByteString
forall a b. (a, b) -> b
snd (Int -> ByteString -> (ByteString, ByteString)
splitAt Int
n ByteString
bs)
span :: (Char -> Bool) -> B.ByteString -> (B.ByteString, B.ByteString)
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
span p :: Char -> Bool
p bs :: ByteString
bs = Int -> ByteString -> (ByteString, ByteString)
loop 0 ByteString
bs
where loop :: Int -> ByteString -> (ByteString, ByteString)
loop a :: Int
a cs :: ByteString
cs = case ByteString -> Maybe (Char, Int)
decode ByteString
cs of
Just (c :: Char
c,n :: Int
n) | Char -> Bool
p Char
c -> Int -> ByteString -> (ByteString, ByteString)
loop (Int
aInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
n) (Int -> ByteString -> ByteString
B.drop Int
n ByteString
cs)
_ -> Int -> ByteString -> (ByteString, ByteString)
B.splitAt Int
a ByteString
bs
break :: (Char -> Bool) -> B.ByteString -> (B.ByteString, B.ByteString)
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
break p :: Char -> Bool
p bs :: ByteString
bs = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
span (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ByteString
bs
uncons :: B.ByteString -> Maybe (Char,B.ByteString)
uncons :: ByteString -> Maybe (Char, ByteString)
uncons bs :: ByteString
bs = do (c :: Char
c,n :: Int
n) <- ByteString -> Maybe (Char, Int)
decode ByteString
bs
(Char, ByteString) -> Maybe (Char, ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return (Char
c, Int -> ByteString -> ByteString
B.drop Int
n ByteString
bs)
foldr :: (Char -> a -> a) -> a -> B.ByteString -> a
foldr :: (Char -> a -> a) -> a -> ByteString -> a
foldr cons :: Char -> a -> a
cons nil :: a
nil cs :: ByteString
cs = case ByteString -> Maybe (Char, ByteString)
uncons ByteString
cs of
Just (a :: Char
a,as :: ByteString
as) -> Char -> a -> a
cons Char
a ((Char -> a -> a) -> a -> ByteString -> a
forall a. (Char -> a -> a) -> a -> ByteString -> a
foldr Char -> a -> a
cons a
nil ByteString
as)
Nothing -> a
nil
foldl :: (a -> Char -> a) -> a -> B.ByteString -> a
foldl :: (a -> Char -> a) -> a -> ByteString -> a
foldl add :: a -> Char -> a
add acc :: a
acc cs :: ByteString
cs = case ByteString -> Maybe (Char, ByteString)
uncons ByteString
cs of
Just (a :: Char
a,as :: ByteString
as) -> let v :: a
v = a -> Char -> a
add a
acc Char
a
in a -> a -> a
forall a b. a -> b -> b
seq a
v ((a -> Char -> a) -> a -> ByteString -> a
forall a. (a -> Char -> a) -> a -> ByteString -> a
foldl a -> Char -> a
add a
v ByteString
as)
Nothing -> a
acc
length :: B.ByteString -> Int
length :: ByteString -> Int
length b :: ByteString
b = Int -> ByteString -> Int
forall p. Num p => p -> ByteString -> p
loop 0 ByteString
b
where loop :: p -> ByteString -> p
loop n :: p
n xs :: ByteString
xs = case ByteString -> Maybe (Char, Int)
decode ByteString
xs of
Just (_,m :: Int
m) -> p -> ByteString -> p
loop (p
np -> p -> p
forall a. Num a => a -> a -> a
+1) (Int -> ByteString -> ByteString
B.drop Int
m ByteString
xs)
Nothing -> p
n
lines :: B.ByteString -> [B.ByteString]
lines :: ByteString -> [ByteString]
lines bs :: ByteString
bs | ByteString -> Bool
B.null ByteString
bs = []
lines bs :: ByteString
bs = case Word8 -> ByteString -> Maybe Int
B.elemIndex 10 ByteString
bs of
Just x :: Int
x -> let (xs :: ByteString
xs,ys :: ByteString
ys) = Int -> ByteString -> (ByteString, ByteString)
B.splitAt Int
x ByteString
bs
in ByteString
xs ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
lines (ByteString -> ByteString
B.tail ByteString
ys)
Nothing -> [ByteString
bs]
lines' :: B.ByteString -> [B.ByteString]
lines' :: ByteString -> [ByteString]
lines' bs :: ByteString
bs | ByteString -> Bool
B.null ByteString
bs = []
lines' bs :: ByteString
bs = case Word8 -> ByteString -> Maybe Int
B.elemIndex 10 ByteString
bs of
Just x :: Int
x -> let (xs :: ByteString
xs,ys :: ByteString
ys) = Int -> ByteString -> (ByteString, ByteString)
B.splitAt (Int
xInt -> Int -> Int
forall a. Num a => a -> a -> a
+1) ByteString
bs
in ByteString
xs ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
lines' ByteString
ys
Nothing -> [ByteString
bs]