--------------------------------------------------------------------
-- |
-- Module    : Text.Regex.Applicative.Text
-- Copyright : (c) 2015 Oleg Grenrus
-- License   : BSD3
--
-- Maintainer: Oleg Grenrus <oleg.grenrus@iki.fi>
-- Stability : experimental
--
-- @Text.Regex.Applicative@ API specialised to 'Char' and 'Text'.
--------------------------------------------------------------------
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >=704
{-# LANGUAGE Safe #-}
#endif
module Text.Regex.Applicative.Text
  (
  -- * Types
    RE'
  , R.RE
  -- * Smart constructors
  , sym
  , psym
  , msym
  , anySym
  , string
  , reFoldl
  , R.Greediness(..)
  , few
  , withMatched
  -- * Basic matchers
  , match
  , (=~)
  , replace
  -- * Advanced matchers
  , findFirstPrefix
  , findLongestPrefix
  , findShortestPrefix
  , findFirstInfix
  , findLongestInfix
  , findShortestInfix
  -- * Module re-exports
  , module Control.Applicative
  ) where

import           Control.Applicative
import           Control.Arrow
import           Data.Monoid (mappend)
import           Data.Text (Text)
import qualified Data.Text as T
import qualified Text.Regex.Applicative as R

-- | Convenience alias for 'RE' working (also) on 'Text'.
type RE' a = R.RE Char a

-- | Match and return a single 'Char' which satisfies the predicate
psym :: (Char -> Bool) -> RE' Char
psym :: (Char -> Bool) -> RE' Char
psym = (Char -> Bool) -> RE' Char
forall s. (s -> Bool) -> RE s s
R.psym

-- | Like 'psym', but allows to return a computed value instead of the
-- original symbol
msym :: (Char -> Maybe a) -> RE' a
msym :: (Char -> Maybe a) -> RE' a
msym = (Char -> Maybe a) -> RE' a
forall s a. (s -> Maybe a) -> RE s a
R.msym

-- | Match and return the given symbol
sym :: Char -> RE' Char
sym :: Char -> RE' Char
sym = Char -> RE' Char
forall s. Eq s => s -> RE s s
R.sym

-- | Match and return any single symbol
anySym :: RE' Char
anySym :: RE' Char
anySym = RE' Char
forall s. RE s s
R.anySym

-- | Match and return the given 'Text'.
--
--
-- > import Text.Regex.Applicative
-- >
-- > number = string "one" *> pure 1 <|> string "two" *> pure 2
-- >
-- > main = print $ "two" =~ number
string :: Text -> RE' Text
string :: Text -> RE' Text
string = (String -> Text) -> RE Char String -> RE' Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> Text
T.pack (RE Char String -> RE' Text)
-> (Text -> RE Char String) -> Text -> RE' Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> RE Char String
forall a. Eq a => [a] -> RE a [a]
R.string (String -> RE Char String)
-> (Text -> String) -> Text -> RE Char String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack

-- | Match zero or more instances of the given expression, which are combined using
-- the given folding function.
--
-- 'Greediness' argument controls whether this regular expression should match
-- as many as possible ('Greedy') or as few as possible ('NonGreedy') instances
-- of the underlying expression.
reFoldl :: R.Greediness -> (b -> a -> b) -> b -> RE' a -> RE' b
reFoldl :: Greediness -> (b -> a -> b) -> b -> RE' a -> RE' b
reFoldl = Greediness -> (b -> a -> b) -> b -> RE' a -> RE' b
forall b a s. Greediness -> (b -> a -> b) -> b -> RE s a -> RE s b
R.reFoldl

-- | Match zero or more instances of the given expression, but as
-- few of them as possible (i.e. /non-greedily/). A greedy equivalent of 'few'
-- is 'many'.x
--
-- > >>> findFirstPrefix (few anySym  <* "b") "ababab"
-- > Just ("a","abab")
-- > >>> findFirstPrefix (many anySym  <* "b") "ababab"
-- > Just ("ababa","")
few :: RE' a -> RE' [a]
few :: RE' a -> RE' [a]
few = RE' a -> RE' [a]
forall s a. RE s a -> RE s [a]
R.few

-- | Return matched symbols as part of the return value
withMatched :: RE' a -> RE' (a, Text)
withMatched :: RE' a -> RE' (a, Text)
withMatched = ((a, String) -> (a, Text)) -> RE Char (a, String) -> RE' (a, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((String -> Text) -> (a, String) -> (a, Text)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second String -> Text
T.pack) (RE Char (a, String) -> RE' (a, Text))
-> (RE' a -> RE Char (a, String)) -> RE' a -> RE' (a, Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. RE' a -> RE Char (a, String)
forall s a. RE s a -> RE s (a, [s])
R.withMatched

-- | @s =~ a = match a s@
(=~) :: Text -> RE' a -> Maybe a
=~ :: Text -> RE' a -> Maybe a
(=~) = (RE' a -> Text -> Maybe a) -> Text -> RE' a -> Maybe a
forall a b c. (a -> b -> c) -> b -> a -> c
flip RE' a -> Text -> Maybe a
forall a. RE' a -> Text -> Maybe a
match
infix 2 =~

-- | Attempt to match a 'Text' against the regular expression.
-- Note that the whole string (not just some part of it) should be matched.
--
-- > >>> match (sym 'a' <|> sym 'b') "a"
-- > Just 'a'
-- > >>> match (sym 'a' <|> sym 'b') "ab"
-- > Nothing
--
match :: RE' a -> Text -> Maybe a
match :: RE' a -> Text -> Maybe a
match = (RE' a -> String -> Maybe a) -> RE' a -> Text -> Maybe a
forall a b. (a -> String -> b) -> a -> Text -> b
reTextF RE' a -> String -> Maybe a
forall s a. RE s a -> [s] -> Maybe a
R.match

-- | Find a string prefix which is matched by the regular expression.
--
-- Of all matching prefixes, pick one using left bias (prefer the left part of
-- '<|>' to the right part) and greediness.
--
-- This is the match which a backtracking engine (such as Perl's one) would find
-- first.
--
-- If match is found, the rest of the input is also returned.
--
-- > >>> findFirstPrefix ("a" <|> "ab") "abc"
-- > Just ("a","bc")
-- > >>> findFirstPrefix ("ab" <|> "a") "abc"
-- > Just ("ab","c")
-- > >>> findFirstPrefix "bc" "abc"
-- > Nothing
findFirstPrefix :: RE' a -> Text -> Maybe (a, Text)
findFirstPrefix :: RE' a -> Text -> Maybe (a, Text)
findFirstPrefix = ((a, String) -> (a, Text)) -> Maybe (a, String) -> Maybe (a, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (a, String) -> (a, Text)
forall a. (a, String) -> (a, Text)
pairF (Maybe (a, String) -> Maybe (a, Text))
-> (RE' a -> Text -> Maybe (a, String))
-> RE' a
-> Text
-> Maybe (a, Text)
forall c d a b. (c -> d) -> (a -> b -> c) -> a -> b -> d
.: (RE' a -> String -> Maybe (a, String))
-> RE' a -> Text -> Maybe (a, String)
forall a b. (a -> String -> b) -> a -> Text -> b
reTextF RE' a -> String -> Maybe (a, String)
forall s a. RE s a -> [s] -> Maybe (a, [s])
R.findFirstPrefix

-- | Find the longest string prefix which is matched by the regular expression.
--
-- Submatches are still determined using left bias and greediness, so this is
-- different from POSIX semantics.
--
-- If match is found, the rest of the input is also returned.
--
--
-- > >>> let keyword = "if"
-- > >>> let identifier = many $ psym isAlpha
-- > >>> let lexeme = (Left <$> keyword) <|> (Right <$> identifier)
-- > >>> findLongestPrefix lexeme "if foo"
-- > Just (Left "if"," foo")
-- > >>> findLongestPrefix lexeme "iffoo"
-- > Just (Right "iffoo","")
findLongestPrefix :: RE' a -> Text -> Maybe (a, Text)
findLongestPrefix :: RE' a -> Text -> Maybe (a, Text)
findLongestPrefix = ((a, String) -> (a, Text)) -> Maybe (a, String) -> Maybe (a, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (a, String) -> (a, Text)
forall a. (a, String) -> (a, Text)
pairF (Maybe (a, String) -> Maybe (a, Text))
-> (RE' a -> Text -> Maybe (a, String))
-> RE' a
-> Text
-> Maybe (a, Text)
forall c d a b. (c -> d) -> (a -> b -> c) -> a -> b -> d
.: (RE' a -> String -> Maybe (a, String))
-> RE' a -> Text -> Maybe (a, String)
forall a b. (a -> String -> b) -> a -> Text -> b
reTextF RE' a -> String -> Maybe (a, String)
forall s a. RE s a -> [s] -> Maybe (a, [s])
R.findLongestPrefix

-- | Find the shortest prefix (analogous to 'findLongestPrefix')
findShortestPrefix :: RE' a -> Text -> Maybe (a, Text)
findShortestPrefix :: RE' a -> Text -> Maybe (a, Text)
findShortestPrefix = ((a, String) -> (a, Text)) -> Maybe (a, String) -> Maybe (a, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (a, String) -> (a, Text)
forall a. (a, String) -> (a, Text)
pairF (Maybe (a, String) -> Maybe (a, Text))
-> (RE' a -> Text -> Maybe (a, String))
-> RE' a
-> Text
-> Maybe (a, Text)
forall c d a b. (c -> d) -> (a -> b -> c) -> a -> b -> d
.: (RE' a -> String -> Maybe (a, String))
-> RE' a -> Text -> Maybe (a, String)
forall a b. (a -> String -> b) -> a -> Text -> b
reTextF RE' a -> String -> Maybe (a, String)
forall s a. RE s a -> [s] -> Maybe (a, [s])
R.findShortestPrefix

-- | Find the leftmost substring that is matched by the regular expression.
-- Otherwise behaves like 'findFirstPrefix'. Returns the result together with
-- the prefix and suffix of the string surrounding the match.
findFirstInfix :: RE' a -> Text -> Maybe (Text, a, Text)
findFirstInfix :: RE' a -> Text -> Maybe (Text, a, Text)
findFirstInfix = ((String, a, String) -> (Text, a, Text))
-> Maybe (String, a, String) -> Maybe (Text, a, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String, a, String) -> (Text, a, Text)
forall a. (String, a, String) -> (Text, a, Text)
tripleF (Maybe (String, a, String) -> Maybe (Text, a, Text))
-> (RE' a -> Text -> Maybe (String, a, String))
-> RE' a
-> Text
-> Maybe (Text, a, Text)
forall c d a b. (c -> d) -> (a -> b -> c) -> a -> b -> d
.: (RE' a -> String -> Maybe (String, a, String))
-> RE' a -> Text -> Maybe (String, a, String)
forall a b. (a -> String -> b) -> a -> Text -> b
reTextF RE' a -> String -> Maybe (String, a, String)
forall s a. RE s a -> [s] -> Maybe ([s], a, [s])
R.findFirstInfix

-- | Find the leftmost substring that is matched by the regular expression.
-- Otherwise behaves like 'findLongestPrefix'. Returns the result together with
-- the prefix and suffix of the string surrounding the match.
findLongestInfix :: RE' a -> Text -> Maybe (Text, a, Text)
findLongestInfix :: RE' a -> Text -> Maybe (Text, a, Text)
findLongestInfix = ((String, a, String) -> (Text, a, Text))
-> Maybe (String, a, String) -> Maybe (Text, a, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String, a, String) -> (Text, a, Text)
forall a. (String, a, String) -> (Text, a, Text)
tripleF (Maybe (String, a, String) -> Maybe (Text, a, Text))
-> (RE' a -> Text -> Maybe (String, a, String))
-> RE' a
-> Text
-> Maybe (Text, a, Text)
forall c d a b. (c -> d) -> (a -> b -> c) -> a -> b -> d
.: (RE' a -> String -> Maybe (String, a, String))
-> RE' a -> Text -> Maybe (String, a, String)
forall a b. (a -> String -> b) -> a -> Text -> b
reTextF RE' a -> String -> Maybe (String, a, String)
forall s a. RE s a -> [s] -> Maybe ([s], a, [s])
R.findLongestInfix

-- | Find the leftmost substring that is matched by the regular expression.
-- Otherwise behaves like 'findShortestPrefix'. Returns the result together with
-- the prefix and suffix of the string surrounding the match.
findShortestInfix :: RE' a -> Text -> Maybe (Text, a, Text)
findShortestInfix :: RE' a -> Text -> Maybe (Text, a, Text)
findShortestInfix = ((String, a, String) -> (Text, a, Text))
-> Maybe (String, a, String) -> Maybe (Text, a, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String, a, String) -> (Text, a, Text)
forall a. (String, a, String) -> (Text, a, Text)
tripleF (Maybe (String, a, String) -> Maybe (Text, a, Text))
-> (RE' a -> Text -> Maybe (String, a, String))
-> RE' a
-> Text
-> Maybe (Text, a, Text)
forall c d a b. (c -> d) -> (a -> b -> c) -> a -> b -> d
.: (RE' a -> String -> Maybe (String, a, String))
-> RE' a -> Text -> Maybe (String, a, String)
forall a b. (a -> String -> b) -> a -> Text -> b
reTextF RE' a -> String -> Maybe (String, a, String)
forall s a. RE s a -> [s] -> Maybe ([s], a, [s])
R.findShortestInfix

-- | Replace matches of regular expression with it's value.
--
-- > >>> replace ("!" <$ sym 'f' <* some (sym 'o')) "quuxfoofooooofoobarfobar"
-- > "quux!!!bar!bar"
replace :: RE' Text -> Text -> Text
replace :: RE' Text -> Text -> Text
replace RE' Text
r = String -> Text
go (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack
  where go :: String -> Text
        go :: String -> Text
go [] = Text
T.empty
        go ys :: String
ys@(Char
x:String
xs) = case RE' Text -> String -> Maybe (Text, String)
forall s a. RE s a -> [s] -> Maybe (a, [s])
R.findLongestPrefix RE' Text
r String
ys of
                         Maybe (Text, String)
Nothing              -> Char -> Text -> Text
T.cons Char
x (String -> Text
go String
xs)
                         Just (Text
prefix, String
rest)  -> Text
prefix Text -> Text -> Text
forall a. Monoid a => a -> a -> a
`mappend` String -> Text
go String
rest

-- Helpers

reTextF :: (a -> String -> b) -> (a -> Text -> b)
reTextF :: (a -> String -> b) -> a -> Text -> b
reTextF a -> String -> b
f a
a Text
s = a -> String -> b
f a
a (Text -> String
T.unpack Text
s)
{- INLINE reTextF -}

pairF :: (a, String) -> (a, Text)
pairF :: (a, String) -> (a, Text)
pairF (a
x, String
y) = (a
x, String -> Text
T.pack String
y)
{-# INLINE pairF #-}

tripleF :: (String, a, String) -> (Text, a, Text)
tripleF :: (String, a, String) -> (Text, a, Text)
tripleF (String
x, a
y, String
z) = (String -> Text
T.pack String
x, a
y, String -> Text
T.pack String
z)
{-# INLINE tripleF #-}

(.:) :: (c -> d) -> (a -> b -> c) -> (a -> b -> d)
c -> d
f .: :: (c -> d) -> (a -> b -> c) -> a -> b -> d
.: a -> b -> c
g = \a
a b
b -> c -> d
f (a -> b -> c
g a
a b
b)
{-# INLINE (.:) #-}