module Data.String.Unicode
(
Unicode,
UString,
UTF8Char,
UTF8String,
UStringWithErrors,
DecodingFct,
DecodingFctEmbedErrors,
utf8ToUnicode
, utf8ToUnicodeEmbedErrors
, latin1ToUnicode
, ucs2ToUnicode
, ucs2BigEndianToUnicode
, ucs2LittleEndianToUnicode
, utf16beToUnicode
, utf16leToUnicode
, unicodeCharToUtf8
, unicodeToUtf8
, unicodeToXmlEntity
, unicodeToLatin1
, unicodeRemoveNoneAscii
, unicodeRemoveNoneLatin1
, intToCharRef
, intToCharRefHex
, intToHexString
, getDecodingFct
, getDecodingFctEmbedErrors
, getOutputEncodingFct
, normalizeNL
, guessEncoding
, getOutputEncodingFct'
, unicodeCharToUtf8'
, unicodeCharToXmlEntity'
, unicodeCharToLatin1'
)
where
import Data.Char (toUpper)
import Data.Char.IsoLatinTables
import Data.Char.Properties.XMLCharProps (isXml1ByteChar,
isXmlLatin1Char)
import Data.String.EncodingNames
import Data.String.UTF8Decoding (decodeUtf8,
decodeUtf8EmbedErrors)
type Unicode = Char
type UString = [Unicode]
type UTF8Char = Char
type UTF8String = String
type DecodingFct = String -> (UString, [String])
type UStringWithErrors = [Either String Char]
type DecodingFctEmbedErrors = String -> UStringWithErrors
unicodeToUtf8 :: UString -> UTF8String
unicodeToUtf8 :: UString -> UString
unicodeToUtf8 = (Unicode -> UString) -> UString -> UString
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Unicode -> UString
unicodeCharToUtf8
unicodeCharToUtf8 :: Unicode -> UTF8String
unicodeCharToUtf8 :: Unicode -> UString
unicodeCharToUtf8 Unicode
c
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x0000007F
= [ Int -> Unicode
forall a. Enum a => Int -> a
toEnum Int
i ]
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x00000080 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x000007FF
= [ Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0xC0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
]
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x00000800 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x0000FFFF
= [ Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0xE0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
]
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x00010000 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x001FFFFF
= [ Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0xF0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40000)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
]
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x00200000 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x03FFFFFF
= [ Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0xF8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000000)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
]
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x04000000 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7FFFFFFF
= [ Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0xFC Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40000000)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)
]
| Bool
otherwise
= UString -> UString
forall a. HasCallStack => UString -> a
error (UString
"unicodeCharToUtf8: illegal integer argument " UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++ Int -> UString
forall a. Show a => a -> UString
show Int
i)
where
i :: Int
i = Unicode -> Int
forall a. Enum a => a -> Int
fromEnum Unicode
c
latin1ToUnicode :: String -> UString
latin1ToUnicode :: UString -> UString
latin1ToUnicode = UString -> UString
forall a. a -> a
id
latinToUnicode :: [(Char, Char)] -> String -> UString
latinToUnicode :: [(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
tt
= (Unicode -> Unicode) -> UString -> UString
forall a b. (a -> b) -> [a] -> [b]
map Unicode -> Unicode
charToUni
where
charToUni :: Unicode -> Unicode
charToUni Unicode
c =
((Unicode, Unicode) -> Unicode -> Unicode)
-> Unicode -> [(Unicode, Unicode)] -> Unicode
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(Unicode
src,Unicode
dst) Unicode
r ->
case Unicode -> Unicode -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Unicode
c Unicode
src of
Ordering
EQ -> Unicode
dst
Ordering
LT -> Unicode
c
Ordering
GT -> Unicode
r) Unicode
c [(Unicode, Unicode)]
tt
decodeAscii :: DecodingFct
decodeAscii :: DecodingFct
decodeAscii
= ([UString], UString) -> (UString, [UString])
forall a b. (a, b) -> (b, a)
swap (([UString], UString) -> (UString, [UString]))
-> (UString -> ([UString], UString)) -> DecodingFct
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Either UString Unicode] -> ([UString], UString)
forall a b. [Either a b] -> ([a], [b])
partitionEither ([Either UString Unicode] -> ([UString], UString))
-> (UString -> [Either UString Unicode])
-> UString
-> ([UString], UString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UString -> [Either UString Unicode]
decodeAsciiEmbedErrors
decodeAsciiEmbedErrors :: String -> UStringWithErrors
decodeAsciiEmbedErrors :: UString -> [Either UString Unicode]
decodeAsciiEmbedErrors UString
str
= ((Unicode, Int) -> Either UString Unicode)
-> [(Unicode, Int)] -> [Either UString Unicode]
forall a b. (a -> b) -> [a] -> [b]
map (\(Unicode
c,Int
pos) -> if Unicode -> Bool
isValid Unicode
c
then Unicode -> Either UString Unicode
forall a b. b -> Either a b
Right Unicode
c
else UString -> Either UString Unicode
forall a b. a -> Either a b
Left (Unicode -> Int -> UString
forall a a. (Show a, Show a) => a -> a -> UString
toErrStr Unicode
c Int
pos)) [(Unicode, Int)]
posStr
where
posStr :: [(Unicode, Int)]
posStr = UString -> [Int] -> [(Unicode, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip UString
str [(Int
0::Int)..]
toErrStr :: a -> a -> UString
toErrStr a
errChr a
pos
= UString
" at input position " UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++ a -> UString
forall a. Show a => a -> UString
show a
pos UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++ UString
": none ASCII char " UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++ a -> UString
forall a. Show a => a -> UString
show a
errChr
isValid :: Unicode -> Bool
isValid Unicode
x = Unicode
x Unicode -> Unicode -> Bool
forall a. Ord a => a -> a -> Bool
< Unicode
'\x80'
ucs2BigEndianToUnicode :: String -> UString
ucs2BigEndianToUnicode :: UString -> UString
ucs2BigEndianToUnicode (Unicode
b : Unicode
l : UString
r)
= Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Unicode -> Int
forall a. Enum a => a -> Int
fromEnum Unicode
b Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
256 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Unicode -> Int
forall a. Enum a => a -> Int
fromEnum Unicode
l) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
: UString -> UString
ucs2BigEndianToUnicode UString
r
ucs2BigEndianToUnicode []
= []
ucs2BigEndianToUnicode UString
_
= []
ucs2LittleEndianToUnicode :: String -> UString
ucs2LittleEndianToUnicode :: UString -> UString
ucs2LittleEndianToUnicode (Unicode
l : Unicode
b : UString
r)
= Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Unicode -> Int
forall a. Enum a => a -> Int
fromEnum Unicode
b Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
256 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Unicode -> Int
forall a. Enum a => a -> Int
fromEnum Unicode
l) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
: UString -> UString
ucs2LittleEndianToUnicode UString
r
ucs2LittleEndianToUnicode []
= []
ucs2LittleEndianToUnicode [Unicode
_]
= []
ucs2ToUnicode :: String -> UString
ucs2ToUnicode :: UString -> UString
ucs2ToUnicode (Unicode
'\xFE':Unicode
'\xFF':UString
s)
= UString -> UString
ucs2BigEndianToUnicode UString
s
ucs2ToUnicode (Unicode
'\xFF':Unicode
'\xFE':UString
s)
= UString -> UString
ucs2LittleEndianToUnicode UString
s
ucs2ToUnicode UString
s
= UString -> UString
ucs2BigEndianToUnicode UString
s
utf8ToUnicode :: DecodingFct
utf8ToUnicode :: DecodingFct
utf8ToUnicode (Unicode
'\xEF':Unicode
'\xBB':Unicode
'\xBF':UString
s)
= DecodingFct
decodeUtf8 UString
s
utf8ToUnicode UString
s
= DecodingFct
decodeUtf8 UString
s
utf8ToUnicodeEmbedErrors :: DecodingFctEmbedErrors
utf8ToUnicodeEmbedErrors :: UString -> [Either UString Unicode]
utf8ToUnicodeEmbedErrors (Unicode
'\xEF':Unicode
'\xBB':Unicode
'\xBF':UString
s)
= UString -> [Either UString Unicode]
decodeUtf8EmbedErrors UString
s
utf8ToUnicodeEmbedErrors UString
s
= UString -> [Either UString Unicode]
decodeUtf8EmbedErrors UString
s
utf16beToUnicode :: String -> UString
utf16beToUnicode :: UString -> UString
utf16beToUnicode (Unicode
'\xFE':Unicode
'\xFF':UString
s)
= UString -> UString
ucs2BigEndianToUnicode UString
s
utf16beToUnicode UString
s
= UString -> UString
ucs2BigEndianToUnicode UString
s
utf16leToUnicode :: String -> UString
utf16leToUnicode :: UString -> UString
utf16leToUnicode (Unicode
'\xFF':Unicode
'\xFE':UString
s)
= UString -> UString
ucs2LittleEndianToUnicode UString
s
utf16leToUnicode UString
s
= UString -> UString
ucs2LittleEndianToUnicode UString
s
unicodeToXmlEntity :: UString -> String
unicodeToXmlEntity :: UString -> UString
unicodeToXmlEntity
= (Unicode -> Bool) -> (Unicode -> UString) -> UString -> UString
escape Unicode -> Bool
isXml1ByteChar (Int -> UString
intToCharRef (Int -> UString) -> (Unicode -> Int) -> Unicode -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Unicode -> Int
forall a. Enum a => a -> Int
fromEnum)
unicodeToLatin1 :: UString -> String
unicodeToLatin1 :: UString -> UString
unicodeToLatin1
= (Unicode -> Bool) -> (Unicode -> UString) -> UString -> UString
escape Unicode -> Bool
isXmlLatin1Char (Int -> UString
intToCharRef (Int -> UString) -> (Unicode -> Int) -> Unicode -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Unicode -> Int
forall a. Enum a => a -> Int
fromEnum)
escape :: (Unicode -> Bool) -> (Unicode -> String) -> UString -> String
escape :: (Unicode -> Bool) -> (Unicode -> UString) -> UString -> UString
escape Unicode -> Bool
check Unicode -> UString
esc =
(Unicode -> UString) -> UString -> UString
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\Unicode
uc -> if Unicode -> Bool
check Unicode
uc then [Unicode
uc] else Unicode -> UString
esc Unicode
uc)
unicodeRemoveNoneAscii :: UString -> String
unicodeRemoveNoneAscii :: UString -> UString
unicodeRemoveNoneAscii
= (Unicode -> Bool) -> UString -> UString
forall a. (a -> Bool) -> [a] -> [a]
filter Unicode -> Bool
isXml1ByteChar
unicodeRemoveNoneLatin1 :: UString -> String
unicodeRemoveNoneLatin1 :: UString -> UString
unicodeRemoveNoneLatin1
= (Unicode -> Bool) -> UString -> UString
forall a. (a -> Bool) -> [a] -> [a]
filter Unicode -> Bool
isXmlLatin1Char
intToCharRef :: Int -> String
intToCharRef :: Int -> UString
intToCharRef Int
i
= UString
"&#" UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++ Int -> UString
forall a. Show a => a -> UString
show Int
i UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++ UString
";"
intToCharRefHex :: Int -> String
intToCharRefHex :: Int -> UString
intToCharRefHex Int
i
= UString
"&#x" UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++ UString
h2 UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++ UString
";"
where
h1 :: UString
h1 = Int -> UString
intToHexString Int
i
h2 :: UString
h2 = if UString -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length UString
h1 Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
2 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
then Unicode
'0'Unicode -> UString -> UString
forall a. a -> [a] -> [a]
: UString
h1
else UString
h1
intToHexString :: Int -> String
intToHexString :: Int -> UString
intToHexString Int
i
| Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
= UString
"0"
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
= Int -> UString
intToStr Int
i
| Bool
otherwise
= UString -> UString
forall a. HasCallStack => UString -> a
error (UString
"intToHexString: negative argument " UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++ Int -> UString
forall a. Show a => a -> UString
show Int
i)
where
intToStr :: Int -> UString
intToStr Int
0 = UString
""
intToStr Int
i' = Int -> UString
intToStr (Int
i' Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
16) UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++ [Int -> Unicode
fourBitsToChar (Int
i' Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
16)]
fourBitsToChar :: Int -> Char
fourBitsToChar :: Int -> Unicode
fourBitsToChar Int
i = UString
"0123456789ABCDEF" UString -> Int -> Unicode
forall a. [a] -> Int -> a
!! Int
i
{-# INLINE fourBitsToChar #-}
normalizeNL :: String -> String
normalizeNL :: UString -> UString
normalizeNL (Unicode
'\r' : Unicode
'\n' : UString
rest) = Unicode
'\n' Unicode -> UString -> UString
forall a. a -> [a] -> [a]
: UString -> UString
normalizeNL UString
rest
normalizeNL (Unicode
'\r' : UString
rest) = Unicode
'\n' Unicode -> UString -> UString
forall a. a -> [a] -> [a]
: UString -> UString
normalizeNL UString
rest
normalizeNL (Unicode
c : UString
rest) = Unicode
c Unicode -> UString -> UString
forall a. a -> [a] -> [a]
: UString -> UString
normalizeNL UString
rest
normalizeNL [] = []
decodingTable :: [(String, DecodingFct)]
decodingTable :: [(UString, DecodingFct)]
decodingTable
= [ (UString
utf8, DecodingFct
utf8ToUnicode )
, (UString
isoLatin1, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct UString -> UString
latin1ToUnicode )
, (UString
usAscii, DecodingFct
decodeAscii )
, (UString
ucs2, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct UString -> UString
ucs2ToUnicode )
, (UString
utf16, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct UString -> UString
ucs2ToUnicode )
, (UString
utf16be, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct UString -> UString
utf16beToUnicode )
, (UString
utf16le, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct UString -> UString
utf16leToUnicode )
, (UString
iso8859_2, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_2) )
, (UString
iso8859_3, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_3) )
, (UString
iso8859_4, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_4) )
, (UString
iso8859_5, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_5) )
, (UString
iso8859_6, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_6) )
, (UString
iso8859_7, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_7) )
, (UString
iso8859_8, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_8) )
, (UString
iso8859_9, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_9) )
, (UString
iso8859_10, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_10) )
, (UString
iso8859_11, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_11) )
, (UString
iso8859_13, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_13) )
, (UString
iso8859_14, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_14) )
, (UString
iso8859_15, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_15) )
, (UString
iso8859_16, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_16) )
, (UString
unicodeString, (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct UString -> UString
forall a. a -> a
id )
, (UString
"", (UString -> UString) -> DecodingFct
forall t a a. (t -> a) -> t -> (a, [a])
liftDecFct UString -> UString
forall a. a -> a
id )
]
where
liftDecFct :: (t -> a) -> t -> (a, [a])
liftDecFct t -> a
df = \ t
s -> (t -> a
df t
s, [])
getDecodingFct :: String -> Maybe DecodingFct
getDecodingFct :: UString -> Maybe DecodingFct
getDecodingFct UString
enc
= UString -> [(UString, DecodingFct)] -> Maybe DecodingFct
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup ((Unicode -> Unicode) -> UString -> UString
forall a b. (a -> b) -> [a] -> [b]
map Unicode -> Unicode
toUpper UString
enc) [(UString, DecodingFct)]
decodingTable
decodingTableEmbedErrors :: [(String, DecodingFctEmbedErrors)]
decodingTableEmbedErrors :: [(UString, UString -> [Either UString Unicode])]
decodingTableEmbedErrors
= [ (UString
utf8, UString -> [Either UString Unicode]
utf8ToUnicodeEmbedErrors )
, (UString
isoLatin1, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct UString -> UString
latin1ToUnicode )
, (UString
usAscii, UString -> [Either UString Unicode]
decodeAsciiEmbedErrors )
, (UString
ucs2, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct UString -> UString
ucs2ToUnicode )
, (UString
utf16, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct UString -> UString
ucs2ToUnicode )
, (UString
utf16be, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct UString -> UString
utf16beToUnicode )
, (UString
utf16le, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct UString -> UString
utf16leToUnicode )
, (UString
iso8859_2, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_2) )
, (UString
iso8859_3, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_3) )
, (UString
iso8859_4, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_4) )
, (UString
iso8859_5, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_5) )
, (UString
iso8859_6, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_6) )
, (UString
iso8859_7, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_7) )
, (UString
iso8859_8, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_8) )
, (UString
iso8859_9, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_9) )
, (UString
iso8859_10, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_10) )
, (UString
iso8859_11, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_11) )
, (UString
iso8859_13, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_13) )
, (UString
iso8859_14, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_14) )
, (UString
iso8859_15, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_15) )
, (UString
iso8859_16, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct ([(Unicode, Unicode)] -> UString -> UString
latinToUnicode [(Unicode, Unicode)]
iso_8859_16) )
, (UString
unicodeString, (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct UString -> UString
forall a. a -> a
id )
, (UString
"", (UString -> UString) -> UString -> [Either UString Unicode]
forall a b a. (a -> [b]) -> a -> [Either a b]
liftDecFct UString -> UString
forall a. a -> a
id )
]
where
liftDecFct :: (a -> [b]) -> a -> [Either a b]
liftDecFct a -> [b]
df = (b -> Either a b) -> [b] -> [Either a b]
forall a b. (a -> b) -> [a] -> [b]
map b -> Either a b
forall a b. b -> Either a b
Right ([b] -> [Either a b]) -> (a -> [b]) -> a -> [Either a b]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [b]
df
getDecodingFctEmbedErrors :: String -> Maybe DecodingFctEmbedErrors
getDecodingFctEmbedErrors :: UString -> Maybe (UString -> [Either UString Unicode])
getDecodingFctEmbedErrors UString
enc
= UString
-> [(UString, UString -> [Either UString Unicode])]
-> Maybe (UString -> [Either UString Unicode])
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup ((Unicode -> Unicode) -> UString -> UString
forall a b. (a -> b) -> [a] -> [b]
map Unicode -> Unicode
toUpper UString
enc) [(UString, UString -> [Either UString Unicode])]
decodingTableEmbedErrors
outputEncodingTable :: [(String, (UString -> String))]
outputEncodingTable :: [(UString, UString -> UString)]
outputEncodingTable
= [ (UString
utf8, UString -> UString
unicodeToUtf8 )
, (UString
isoLatin1, UString -> UString
unicodeToLatin1 )
, (UString
usAscii, UString -> UString
unicodeToXmlEntity )
, (UString
unicodeString, UString -> UString
forall a. a -> a
id )
, (UString
"", UString -> UString
unicodeToUtf8 )
]
getOutputEncodingFct :: String -> Maybe (String -> UString)
getOutputEncodingFct :: UString -> Maybe (UString -> UString)
getOutputEncodingFct UString
enc
= UString
-> [(UString, UString -> UString)] -> Maybe (UString -> UString)
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup ((Unicode -> Unicode) -> UString -> UString
forall a b. (a -> b) -> [a] -> [b]
map Unicode -> Unicode
toUpper UString
enc) [(UString, UString -> UString)]
outputEncodingTable
guessEncoding :: String -> String
guessEncoding :: UString -> UString
guessEncoding (Unicode
'\xFF':Unicode
'\xFE':Unicode
'\x00':Unicode
'\x00':UString
_) = UString
"UCS-4LE"
guessEncoding (Unicode
'\xFF':Unicode
'\xFE':UString
_) = UString
"UTF-16LE"
guessEncoding (Unicode
'\xFE':Unicode
'\xFF':Unicode
'\x00':Unicode
'\x00':UString
_) = UString
"UCS-4-3421"
guessEncoding (Unicode
'\xFE':Unicode
'\xFF':UString
_) = UString
"UTF-16BE"
guessEncoding (Unicode
'\xEF':Unicode
'\xBB':Unicode
'\xBF':UString
_) = UString
utf8
guessEncoding (Unicode
'\x00':Unicode
'\x00':Unicode
'\xFE':Unicode
'\xFF':UString
_) = UString
"UCS-4BE"
guessEncoding (Unicode
'\x00':Unicode
'\x00':Unicode
'\xFF':Unicode
'\xFE':UString
_) = UString
"UCS-4-2143"
guessEncoding (Unicode
'\x00':Unicode
'\x00':Unicode
'\x00':Unicode
'\x3C':UString
_) = UString
"UCS-4BE"
guessEncoding (Unicode
'\x3C':Unicode
'\x00':Unicode
'\x00':Unicode
'\x00':UString
_) = UString
"UCS-4LE"
guessEncoding (Unicode
'\x00':Unicode
'\x00':Unicode
'\x3C':Unicode
'\x00':UString
_) = UString
"UCS-4-2143"
guessEncoding (Unicode
'\x00':Unicode
'\x3C':Unicode
'\x00':Unicode
'\x00':UString
_) = UString
"UCS-4-3412"
guessEncoding (Unicode
'\x00':Unicode
'\x3C':Unicode
'\x00':Unicode
'\x3F':UString
_) = UString
"UTF-16BE"
guessEncoding (Unicode
'\x3C':Unicode
'\x00':Unicode
'\x3F':Unicode
'\x00':UString
_) = UString
"UTF-16LE"
guessEncoding (Unicode
'\x4C':Unicode
'\x6F':Unicode
'\xA7':Unicode
'\x94':UString
_) = UString
"EBCDIC"
guessEncoding UString
_ = UString
""
swap :: (a,b) -> (b,a)
swap :: (a, b) -> (b, a)
swap (a
x,b
y) = (b
y,a
x)
{-# INLINE swap #-}
partitionEither :: [Either a b] -> ([a], [b])
partitionEither :: [Either a b] -> ([a], [b])
partitionEither =
(Either a b -> ([a], [b]) -> ([a], [b]))
-> ([a], [b]) -> [Either a b] -> ([a], [b])
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Either a b
x ~([a]
ls,[b]
rs) -> (a -> ([a], [b])) -> (b -> ([a], [b])) -> Either a b -> ([a], [b])
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\a
l -> (a
la -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
ls,[b]
rs)) (\b
r -> ([a]
ls,b
rb -> [b] -> [b]
forall a. a -> [a] -> [a]
:[b]
rs)) Either a b
x) ([],[])
{-# INLINE partitionEither #-}
type StringFct = String -> String
outputEncodingTable' :: [(String, (Char -> StringFct))]
outputEncodingTable' :: [(UString, Unicode -> UString -> UString)]
outputEncodingTable'
= [ (UString
utf8, Unicode -> UString -> UString
unicodeCharToUtf8' )
, (UString
isoLatin1, Unicode -> UString -> UString
unicodeCharToLatin1' )
, (UString
usAscii, Unicode -> UString -> UString
unicodeCharToXmlEntity' )
, (UString
"", Unicode -> UString -> UString
unicodeCharToUtf8' )
]
getOutputEncodingFct' :: String -> Maybe (Char -> StringFct)
getOutputEncodingFct' :: UString -> Maybe (Unicode -> UString -> UString)
getOutputEncodingFct' UString
enc
= UString
-> [(UString, Unicode -> UString -> UString)]
-> Maybe (Unicode -> UString -> UString)
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup ((Unicode -> Unicode) -> UString -> UString
forall a b. (a -> b) -> [a] -> [b]
map Unicode -> Unicode
toUpper UString
enc) [(UString, Unicode -> UString -> UString)]
outputEncodingTable'
unicodeCharToUtf8' :: Char -> StringFct
unicodeCharToUtf8' :: Unicode -> UString -> UString
unicodeCharToUtf8' Unicode
c
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x0000007F
= (Unicode
c Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:)
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x00000080 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x000007FF
= ((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0xC0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40) ) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:)
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x00000800 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x0000FFFF
= ((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0xE0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000) ) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:)
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x00010000 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x001FFFFF
= ((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0xF0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40000) ) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:)
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x00200000 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x03FFFFFF
= ((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0xF8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000000) ) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:)
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x04000000 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7FFFFFFF
= ((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0xFC Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40000000) ) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x1000) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
0x40) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:) (UString -> UString) -> (UString -> UString) -> UString -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((Int -> Unicode
forall a. Enum a => Int -> a
toEnum (Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
0x40)) Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:)
| Bool
otherwise
= UString -> UString -> UString
forall a. HasCallStack => UString -> a
error (UString
"unicodeCharToUtf8: illegal integer argument " UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++ Int -> UString
forall a. Show a => a -> UString
show Int
i)
where
i :: Int
i = Unicode -> Int
forall a. Enum a => a -> Int
fromEnum Unicode
c
unicodeCharToXmlEntity' :: Char -> StringFct
unicodeCharToXmlEntity' :: Unicode -> UString -> UString
unicodeCharToXmlEntity' Unicode
c
| Unicode -> Bool
isXml1ByteChar Unicode
c = (Unicode
c Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:)
| Bool
otherwise = ((Int -> UString
intToCharRef (Int -> UString) -> (Unicode -> Int) -> Unicode -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Unicode -> Int
forall a. Enum a => a -> Int
fromEnum (Unicode -> UString) -> Unicode -> UString
forall a b. (a -> b) -> a -> b
$ Unicode
c) UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++)
unicodeCharToLatin1' :: Char -> StringFct
unicodeCharToLatin1' :: Unicode -> UString -> UString
unicodeCharToLatin1' Unicode
c
| Unicode -> Bool
isXmlLatin1Char Unicode
c = (Unicode
c Unicode -> UString -> UString
forall a. a -> [a] -> [a]
:)
| Bool
otherwise = ((Int -> UString
intToCharRef (Int -> UString) -> (Unicode -> Int) -> Unicode -> UString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Unicode -> Int
forall a. Enum a => a -> Int
fromEnum (Unicode -> UString) -> Unicode -> UString
forall a b. (a -> b) -> a -> b
$ Unicode
c) UString -> UString -> UString
forall a. [a] -> [a] -> [a]
++)