{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections   #-}
{-# LANGUAGE ViewPatterns    #-}

-- | Description: Calculate differences between vectors.
--
-- This module implements a variation on the
-- <http://en.wikipedia.org/wiki/Wagner–Fischer_algorithm Wagner-Fischer>
-- algorithm to find the shortest sequences of operations which transforms
-- one vector of values into another.
module Data.Vector.Distance (
  -- * Types
  Params(..),
  ChangeMatrix(..),

  -- * Operations
  leastChanges,
  allChanges,

  -- * Example
  strParams,
) where

import           Control.Applicative
import           Control.Arrow       ((***))
import           Data.Function
import           Data.List           hiding (delete, insert)
import           Data.Maybe
import           Data.Monoid
import           Data.Vector         (Vector)
import qualified Data.Vector         as V

-- | Operations invoked by the Wagner-Fischer algorithm.
--
--   The parameters to this type are as follows:
--
--   * 'v' is the type of values being compared,
--   * 'o' is the type representing operations,
--   * 'c' is the type representing costs.
--
--   The chief restrictions on these type parameters is that the cost type 'c' 
--   must have instances of 'Monoid' and 'Ord'. A good default choice might be
--   the type @('Sum' 'Int')@.
data Params v o c = Params
    { forall v o c. Params v o c -> v -> v -> Bool
equivalent     :: v -> v -> Bool
    -- ^ Are two values equivalent?
    , forall v o c. Params v o c -> Int -> v -> o
delete         :: Int -> v -> o
    -- ^ Delete the element at an index.
    , forall v o c. Params v o c -> Int -> v -> o
insert         :: Int -> v -> o
    -- ^ Insert an element at an index.
    , forall v o c. Params v o c -> Int -> v -> v -> o
substitute     :: Int -> v -> v -> o
    -- ^ Substitute an element at an index.
    , forall v o c. Params v o c -> o -> c
cost           :: o -> c
    -- ^ Cost of a change.
    , forall v o c. Params v o c -> o -> Int
positionOffset :: o -> Int
    -- ^ Positions to advance after a change. E.g. @0@ for a deletion.
    }


-- | Matrix of optimal edit scripts and costs for all prefixes of two vectors.
--
--   This is a representation of the @n * m@ dynamic programming matrix
--   constructed by the algorithm.  The matrix is stored in a 'Vector' in
--   row-major format with an additional row and column corresponding to the
--   empty prefix of the source and destination 'Vectors'.
type ChangeMatrix o c = Vector (c, [o])

-- | /O(nm)./ Find the cost and optimal edit script to transform one 'Vector'
--   into another.
leastChanges
    :: (Monoid c, Ord c)
    => Params v o c
    -> Vector v -- ^ \"Source\" vector.
    -> Vector v -- ^ \"Destination" vector.
    -> (c, [o])
leastChanges :: forall c v o.
(Monoid c, Ord c) =>
Params v o c -> Vector v -> Vector v -> (c, [o])
leastChanges Params v o c
p Vector v
ss Vector v
tt = ([Maybe o] -> [o]) -> (c, [Maybe o]) -> (c, [o])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ([Maybe o] -> [o]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe o] -> [o]) -> ([Maybe o] -> [Maybe o]) -> [Maybe o] -> [o]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe o] -> [Maybe o]
forall a. [a] -> [a]
reverse) ((c, [Maybe o]) -> (c, [o]))
-> (Vector (c, [Maybe o]) -> (c, [Maybe o]))
-> Vector (c, [Maybe o])
-> (c, [o])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (c, [Maybe o]) -> (c, [Maybe o])
forall a. Vector a -> a
V.last (Vector (c, [Maybe o]) -> (c, [o]))
-> Vector (c, [Maybe o]) -> (c, [o])
forall a b. (a -> b) -> a -> b
$ Params v o c -> Vector v -> Vector v -> Vector (c, [Maybe o])
forall c v o.
(Monoid c, Ord c) =>
Params v o c -> Vector v -> Vector v -> Vector (c, [Maybe o])
rawChanges Params v o c
p Vector v
ss Vector v
tt

-- | /O(nm)./ Calculate the complete matrix of edit scripts and costs between
--   two vectors.
allChanges
    :: (Monoid c, Ord c)
    => Params v o c
    -> Vector v -- ^ \"Source\" vector.
    -> Vector v -- ^ \"Destination" vector.
    -> ChangeMatrix o c
allChanges :: forall c v o.
(Monoid c, Ord c) =>
Params v o c -> Vector v -> Vector v -> ChangeMatrix o c
allChanges Params v o c
p Vector v
src Vector v
dst = ((c, [Maybe o]) -> (c, [o]))
-> Vector (c, [Maybe o]) -> Vector (c, [o])
forall a b. (a -> b) -> Vector a -> Vector b
V.map (([Maybe o] -> [o]) -> (c, [Maybe o]) -> (c, [o])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ([Maybe o] -> [o]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe o] -> [o]) -> ([Maybe o] -> [Maybe o]) -> [Maybe o] -> [o]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe o] -> [Maybe o]
forall a. [a] -> [a]
reverse)) (Vector (c, [Maybe o]) -> Vector (c, [o]))
-> Vector (c, [Maybe o]) -> Vector (c, [o])
forall a b. (a -> b) -> a -> b
$ Params v o c -> Vector v -> Vector v -> Vector (c, [Maybe o])
forall c v o.
(Monoid c, Ord c) =>
Params v o c -> Vector v -> Vector v -> Vector (c, [Maybe o])
rawChanges Params v o c
p Vector v
src Vector v
dst

-- | /O(nm)./ Calculate the complete matrix of edit scripts and costs between
--   two vectors.
--
--   This is a fairly direct implementation of Wagner-Fischer algorithm using
--   the 'Vector' data-type. The 'ChangeMatrix' is constructed in a single-pass.
--
--   Note: The change matrix is \"raw\" in that the edit script in each cell is
--   in reverse order and uses 'Maybe' to allow for steps at which no change is
--   necessary.
rawChanges
    :: (Monoid c, Ord c)
    => Params v o c
    -> Vector v -- ^ \"Source\" vector.
    -> Vector v -- ^ \"Destination" vector.
    -> Vector (c, [Maybe o])
rawChanges :: forall c v o.
(Monoid c, Ord c) =>
Params v o c -> Vector v -> Vector v -> Vector (c, [Maybe o])
rawChanges p :: Params v o c
p@Params{v -> v -> Bool
o -> c
o -> Int
Int -> v -> o
Int -> v -> v -> o
positionOffset :: o -> Int
cost :: o -> c
substitute :: Int -> v -> v -> o
insert :: Int -> v -> o
delete :: Int -> v -> o
equivalent :: v -> v -> Bool
positionOffset :: forall v o c. Params v o c -> o -> Int
cost :: forall v o c. Params v o c -> o -> c
substitute :: forall v o c. Params v o c -> Int -> v -> v -> o
insert :: forall v o c. Params v o c -> Int -> v -> o
delete :: forall v o c. Params v o c -> Int -> v -> o
equivalent :: forall v o c. Params v o c -> v -> v -> Bool
..} Vector v
src Vector v
dst =
    let len_x :: Int
len_x = Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector v -> Int
forall a. Vector a -> Int
V.length Vector v
dst
        len_y :: Int
len_y = Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector v -> Int
forall a. Vector a -> Int
V.length Vector v
src
        len_n :: Int
len_n = Int
len_x Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
len_y
        ix :: Int -> Int -> Int
ix Int
x Int
y = (Int
x Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
len_y) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
y
        -- Get a cell from the 'ChangeMatrix'. It is an error to get a cell
        -- which hasn't been calculated yet!
        get :: Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
        get :: forall c o. Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
get Vector (c, [Maybe o])
m Int
x Int
y = (c, [Maybe o]) -> Maybe (c, [Maybe o]) -> (c, [Maybe o])
forall a. a -> Maybe a -> a
fromMaybe (String -> (c, [Maybe o])
forall a. HasCallStack => String -> a
error (String -> (c, [Maybe o])) -> String -> (c, [Maybe o])
forall a b. (a -> b) -> a -> b
$ String
"Unable to get " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> (Int, Int) -> String
forall a. Show a => a -> String
show (Int
x,Int
y) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" from change matrix") (Vector (c, [Maybe o])
m Vector (c, [Maybe o]) -> Int -> Maybe (c, [Maybe o])
forall a. Vector a -> Int -> Maybe a
V.!? (Int -> Int -> Int
ix Int
x Int
y))
        -- Calculate the position to be updated by the next edit in a script.
        position :: [Maybe o] -> Int
position = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> ([Maybe o] -> [Int]) -> [Maybe o] -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Maybe o -> Int) -> [Maybe o] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> (o -> Int) -> Maybe o -> Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Int
1 o -> Int
positionOffset)
        -- Given a partially complete 'ChangeMatrix', compute the next cell.
        ctr :: Vector (c, [Maybe o]) -> (c, [Maybe o])
ctr Vector (c, [Maybe o])
v = case Vector (c, [Maybe o]) -> Int
forall a. Vector a -> Int
V.length Vector (c, [Maybe o])
v Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
len_y of
            -- Do nothing for "" ~> ""
            (        Int
0,         Int
0) -> (c
forall a. Monoid a => a
mempty, [Maybe o]
forall a. Monoid a => a
mempty)
            -- Delete everything in src for "..." ~> ""
            (        Int
0, Int -> Int
forall a. Enum a => a -> a
pred -> Int
y) ->
                let o :: o
o = Int -> v -> o
delete Int
0 (Vector v
src Vector v -> Int -> v
forall a. Vector a -> Int -> a
V.! Int
y)
                    (c
pc, [Maybe o]
po) = Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
forall c o. Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
get Vector (c, [Maybe o])
v Int
0 Int
y
                in (o -> c
cost o
o c -> c -> c
forall a. Semigroup a => a -> a -> a
<> c
pc, o -> Maybe o
forall a. a -> Maybe a
Just o
o Maybe o -> [Maybe o] -> [Maybe o]
forall a. a -> [a] -> [a]
: [Maybe o]
po)
            -- Insert everything in dst for "" ~> "..."
            (Int -> Int
forall a. Enum a => a -> a
pred -> Int
x,         Int
0) ->
                let o :: o
o = Int -> v -> o
insert Int
x (v -> Maybe v -> v
forall a. a -> Maybe a -> a
fromMaybe (String -> v
forall a. HasCallStack => String -> a
error String
"NAH") (Maybe v -> v) -> Maybe v -> v
forall a b. (a -> b) -> a -> b
$ Vector v
dst Vector v -> Int -> Maybe v
forall a. Vector a -> Int -> Maybe a
V.!? Int
x)
                    (c
pc, [Maybe o]
po) = Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
forall c o. Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
get Vector (c, [Maybe o])
v Int
x Int
0
                in (o -> c
cost o
o c -> c -> c
forall a. Semigroup a => a -> a -> a
<> c
pc, o -> Maybe o
forall a. a -> Maybe a
Just o
o Maybe o -> [Maybe o] -> [Maybe o]
forall a. a -> [a] -> [a]
: [Maybe o]
po)
            -- Compare options between src and dst for "..." ~> "..."
            (Int -> Int
forall a. Enum a => a -> a
pred -> Int
x, Int -> Int
forall a. Enum a => a -> a
pred -> Int
y) ->
                let s :: v
s = Vector v
src Vector v -> Int -> v
forall a. Vector a -> Int -> a
V.! Int
y
                    d :: v
d = Vector v
dst Vector v -> Int -> v
forall a. Vector a -> Int -> a
V.! Int
x
                    tl :: (c, [Maybe o])
tl   = Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
forall c o. Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
get Vector (c, [Maybe o])
v (Int
x)   (Int
y)
                    top :: (c, [Maybe o])
top  = Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
forall c o. Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
get Vector (c, [Maybe o])
v (Int
xInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
y)
                    left :: (c, [Maybe o])
left = Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
forall c o. Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
get Vector (c, [Maybe o])
v (Int
x)   (Int
yInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                in if v
s v -> v -> Bool
`equivalent` v
d
                    then (Maybe o
forall a. Maybe a
NothingMaybe o -> [Maybe o] -> [Maybe o]
forall a. a -> [a] -> [a]
:) ([Maybe o] -> [Maybe o]) -> (c, [Maybe o]) -> (c, [Maybe o])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
forall c o. Vector (c, [Maybe o]) -> Int -> Int -> (c, [Maybe o])
get Vector (c, [Maybe o])
v Int
x Int
y
                    else ((c, [Maybe o]) -> (c, [Maybe o]) -> Ordering)
-> [(c, [Maybe o])] -> (c, [Maybe o])
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy (c -> c -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (c -> c -> Ordering)
-> ((c, [Maybe o]) -> c)
-> (c, [Maybe o])
-> (c, [Maybe o])
-> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (c, [Maybe o]) -> c
forall a b. (a, b) -> a
fst)
                        -- Option 1: perform a deletion.
                        [ let c :: o
c = Int -> v -> o
delete ([Maybe o] -> Int
position ([Maybe o] -> Int)
-> ((c, [Maybe o]) -> [Maybe o]) -> (c, [Maybe o]) -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (c, [Maybe o]) -> [Maybe o]
forall a b. (a, b) -> b
snd ((c, [Maybe o]) -> Int) -> (c, [Maybe o]) -> Int
forall a b. (a -> b) -> a -> b
$ (c, [Maybe o])
top) v
s
                          in (o -> c
cost o
c c -> c -> c
forall a. Semigroup a => a -> a -> a
<>) (c -> c)
-> ([Maybe o] -> [Maybe o]) -> (c, [Maybe o]) -> (c, [Maybe o])
forall (a :: * -> * -> *) b c b' c'.
Arrow a =>
a b c -> a b' c' -> a (b, b') (c, c')
*** (o -> Maybe o
forall a. a -> Maybe a
Just o
c Maybe o -> [Maybe o] -> [Maybe o]
forall a. a -> [a] -> [a]
:) ((c, [Maybe o]) -> (c, [Maybe o]))
-> (c, [Maybe o]) -> (c, [Maybe o])
forall a b. (a -> b) -> a -> b
$ (c, [Maybe o])
top
                        -- Option 2: perform an insertion.
                        , let c :: o
c = Int -> v -> o
insert ([Maybe o] -> Int
position ([Maybe o] -> Int)
-> ((c, [Maybe o]) -> [Maybe o]) -> (c, [Maybe o]) -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (c, [Maybe o]) -> [Maybe o]
forall a b. (a, b) -> b
snd ((c, [Maybe o]) -> Int) -> (c, [Maybe o]) -> Int
forall a b. (a -> b) -> a -> b
$ (c, [Maybe o])
left) v
d
                          in (o -> c
cost o
c c -> c -> c
forall a. Semigroup a => a -> a -> a
<>) (c -> c)
-> ([Maybe o] -> [Maybe o]) -> (c, [Maybe o]) -> (c, [Maybe o])
forall (a :: * -> * -> *) b c b' c'.
Arrow a =>
a b c -> a b' c' -> a (b, b') (c, c')
*** (o -> Maybe o
forall a. a -> Maybe a
Just o
c Maybe o -> [Maybe o] -> [Maybe o]
forall a. a -> [a] -> [a]
:) ((c, [Maybe o]) -> (c, [Maybe o]))
-> (c, [Maybe o]) -> (c, [Maybe o])
forall a b. (a -> b) -> a -> b
$ (c, [Maybe o])
left
                        -- Option 3: perform a substitution.
                        , let c :: o
c = Int -> v -> v -> o
substitute ([Maybe o] -> Int
position ([Maybe o] -> Int)
-> ((c, [Maybe o]) -> [Maybe o]) -> (c, [Maybe o]) -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (c, [Maybe o]) -> [Maybe o]
forall a b. (a, b) -> b
snd ((c, [Maybe o]) -> Int) -> (c, [Maybe o]) -> Int
forall a b. (a -> b) -> a -> b
$ (c, [Maybe o])
tl) v
s v
d
                          in (o -> c
cost o
c c -> c -> c
forall a. Semigroup a => a -> a -> a
<>) (c -> c)
-> ([Maybe o] -> [Maybe o]) -> (c, [Maybe o]) -> (c, [Maybe o])
forall (a :: * -> * -> *) b c b' c'.
Arrow a =>
a b c -> a b' c' -> a (b, b') (c, c')
*** (o -> Maybe o
forall a. a -> Maybe a
Just o
c Maybe o -> [Maybe o] -> [Maybe o]
forall a. a -> [a] -> [a]
:) ((c, [Maybe o]) -> (c, [Maybe o]))
-> (c, [Maybe o]) -> (c, [Maybe o])
forall a b. (a -> b) -> a -> b
$ (c, [Maybe o])
tl
                        ]
    in Int
-> (Vector (c, [Maybe o]) -> (c, [Maybe o]))
-> Vector (c, [Maybe o])
forall a. Int -> (Vector a -> a) -> Vector a
V.constructN Int
len_n Vector (c, [Maybe o]) -> (c, [Maybe o])
ctr

-- | Example 'Params' to compare @('Vector' 'Char')@ values.
--
--   The algorithm will produce edit distances in terms of @('Sum' 'Int')@ and
--   edit scripts containing @(String, Int, Char)@ values.
--   
--   The first component of each operation is either @"delete"@, @"insert"@, or
--   @"replace"@.
strParams :: Params Char (String, Int, Char) (Sum Int)
strParams :: Params Char (String, Int, Char) (Sum Int)
strParams = Params :: forall v o c.
(v -> v -> Bool)
-> (Int -> v -> o)
-> (Int -> v -> o)
-> (Int -> v -> v -> o)
-> (o -> c)
-> (o -> Int)
-> Params v o c
Params{Char -> Char -> Bool
Int -> Char -> (String, Int, Char)
Int -> Char -> Char -> (String, Int, Char)
(String, Int, Char) -> Int
(String, Int, Char) -> Sum Int
forall {b} {c}. b -> c -> (String, b, c)
forall {a} {p}. Num a => p -> Sum a
forall {b} {p} {c}. b -> p -> c -> (String, b, c)
forall {p} {b} {c}. Num p => (String, b, c) -> p
positionOffset :: forall {p} {b} {c}. Num p => (String, b, c) -> p
cost :: forall {a} {p}. Num a => p -> Sum a
substitute :: forall {b} {p} {c}. b -> p -> c -> (String, b, c)
insert :: forall {b} {c}. b -> c -> (String, b, c)
delete :: forall {b} {c}. b -> c -> (String, b, c)
equivalent :: Char -> Char -> Bool
positionOffset :: (String, Int, Char) -> Int
cost :: (String, Int, Char) -> Sum Int
substitute :: Int -> Char -> Char -> (String, Int, Char)
insert :: Int -> Char -> (String, Int, Char)
delete :: Int -> Char -> (String, Int, Char)
equivalent :: Char -> Char -> Bool
..}
  where
    equivalent :: Char -> Char -> Bool
equivalent = Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
(==)
    delete :: b -> c -> (String, b, c)
delete     b
i c
c    = (String
"delete", b
i, c
c)
    insert :: b -> c -> (String, b, c)
insert     b
i c
c    = (String
"insert", b
i, c
c)
    substitute :: b -> p -> c -> (String, b, c)
substitute b
i p
c c
c' = (String
"replace", b
i, c
c')
    cost :: p -> Sum a
cost p
_ = a -> Sum a
forall a. a -> Sum a
Sum a
1
    positionOffset :: (String, b, c) -> p
positionOffset (String
"delete", b
_, c
_) = p
0
    positionOffset (String, b, c)
_ = p
1