Branimir Maksimovic | 24 Mar 16:08 2013
Picon

my take at knucleotide

Hi, I have tried to implement knucleotide benchmark program this time:

Implementation is shorter (uses hashtable from hashtables package),
but two time slower then current Haskell entry ( which is too low level
for my taste :)).
What is interesting is that if I try to place Int64 as a key to
hash table, performance is even slower.
Strange that dropping and taking from bytestring would be
faster than packing string in 64 bit int and directly indexing.

If someone can see something that can bring performance on par
with current haskell entry , I would post it , otherwise no point,
except that program is shorter and not low level.

{-# Language BangPatterns #-}
--
-- The Computer Language Benchmarks Game
-- http://benchmarksgame.alioth.debian.org/
--
-- Contributed by Branimir Maksimovic
--
import Data.Char
import Data.List
import Data.IORef
import qualified Data.HashTable.IO as H
import qualified Data.ByteString.Char8 as S
import Control.Concurrent
import Text.Printf

main = do
    s <- S.getContents
    let content = (S.map toUpper . S.concat . tail .
                  dropWhile (\l->not $ S.isPrefixOf (S.pack ">THREE") l) .
                  S.lines) s
    mapM_ (execute content) actions

data Actions = I Int | S String
actions = [I 1,I 2,
           S "GGT",S "GGTA",S "GGTATT",S "GGTATTTTAATT",S "GGTATTTTAATTTATAGT"]
execute content (I i) = writeFrequencies content i
execute content (S s) = writeCount content s

writeFrequencies input size = do
    ht <- tcalculate input size
    lst <- H.foldM (\lst (k,v)->do 
        v' <- readIORef v
        return $ insertBy (\(_,x) (_,y)->y `compare` x) (k,v') lst) [] ht
    let sum = fromIntegral ((S.length input) + 1 - size)
    mapM_ (\(k,v)-> do
        printf "%s %.3f\n" 
            (S.unpack k) ((100 * (fromIntegral v)/sum)::Double)) lst
    putChar '\n'

writeCount input string = do
    let size = length string
    ht <- tcalculate input size
    res <- H.lookup ht (S.pack string)
    case res of 
        Nothing -> putStrLn $ string ++ " not found..."
        Just v -> do
            r <- readIORef v
            printf "%d\t%s\n" r (string::String)

tcalculate input size = do
    let 
        l = [0..7]
        actions = map (\i -> (calculate input i size (length l))) l
    vars <- mapM (\action -> do
                    var <- newEmptyMVar
                    forkIO $ do
                        answer <- action
                        putMVar var answer
                    return var) actions
    result <- newTable
    results <- mapM takeMVar vars
    mapM_ (\ht -> H.foldM (\lst (k,v) -> do 
                            res <- H.lookup lst k
                            case res of
                                Nothing -> do
                                    r1 <- readIORef v
                                    r2 <- newIORef r1
                                    H.insert lst k r2
                                Just v1 -> do
                                    r1 <- readIORef v1
                                    r2 <- readIORef v
                                    writeIORef v1 (r1+r2)
                            return lst) result ht) results
    return result
    
calculate input beg size incr = do
    ht <- newTable
    let
        calculate' :: S.ByteString -> Int -> IO HashTable
        calculate' str i 
         | i >= ((S.length input)+1 - size) = return ht
         | otherwise = do
            res <- H.lookup ht k
            case res of
                Nothing -> do
                    !r <- newIORef 1
                    H.insert ht k r
                Just v -> do
                    !r <- readIORef v
                    writeIORef v (r+1)
            calculate' (S.drop incr str) (i+incr)
                where k = S.take size str
    calculate' (S.drop beg input) beg

type HashTable = H.BasicHashTable S.ByteString (IORef Int) 
newTable :: IO HashTable
newTable = H.new

_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe <at> haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe
Gregory Collins | 24 Mar 20:12 2013
Picon

Re: my take at knucleotide

What happens to performance if you compile and link with "cabal install --constraint='hashable < 1.2'" ?

G


On Sun, Mar 24, 2013 at 4:08 PM, Branimir Maksimovic <bmaxa <at> hotmail.com> wrote:
Hi, I have tried to implement knucleotide benchmark program this time:

Implementation is shorter (uses hashtable from hashtables package),
but two time slower then current Haskell entry ( which is too low level
for my taste :)).
What is interesting is that if I try to place Int64 as a key to
hash table, performance is even slower.
Strange that dropping and taking from bytestring would be
faster than packing string in 64 bit int and directly indexing.

If someone can see something that can bring performance on par
with current haskell entry , I would post it , otherwise no point,
except that program is shorter and not low level.

{-# Language BangPatterns #-}
--
-- The Computer Language Benchmarks Game
--
-- Contributed by Branimir Maksimovic
--
import Data.Char
import Data.List
import Data.IORef
import qualified Data.HashTable.IO as H
import qualified Data.ByteString.Char8 as S
import Control.Concurrent
import Text.Printf

main = do
    s <- S.getContents
    let content = (S.map toUpper . S.concat . tail .
                  dropWhile (\l->not $ S.isPrefixOf (S.pack ">THREE") l) .
                  S.lines) s
    mapM_ (execute content) actions

data Actions = I Int | S String
actions = [I 1,I 2,
           S "GGT",S "GGTA",S "GGTATT",S "GGTATTTTAATT",S "GGTATTTTAATTTATAGT"]
execute content (I i) = writeFrequencies content i
execute content (S s) = writeCount content s

writeFrequencies input size = do
    ht <- tcalculate input size
    lst <- H.foldM (\lst (k,v)->do 
        v' <- readIORef v
        return $ insertBy (\(_,x) (_,y)->y `compare` x) (k,v') lst) [] ht
    let sum = fromIntegral ((S.length input) + 1 - size)
    mapM_ (\(k,v)-> do
        printf "%s %.3f\n" 
            (S.unpack k) ((100 * (fromIntegral v)/sum)::Double)) lst
    putChar '\n'

writeCount input string = do
    let size = length string
    ht <- tcalculate input size
    res <- H.lookup ht (S.pack string)
    case res of 
        Nothing -> putStrLn $ string ++ " not found..."
        Just v -> do
            r <- readIORef v
            printf "%d\t%s\n" r (string::String)

tcalculate input size = do
    let 
        l = [0..7]
        actions = map (\i -> (calculate input i size (length l))) l
    vars <- mapM (\action -> do
                    var <- newEmptyMVar
                    forkIO $ do
                        answer <- action
                        putMVar var answer
                    return var) actions
    result <- newTable
    results <- mapM takeMVar vars
    mapM_ (\ht -> H.foldM (\lst (k,v) -> do 
                            res <- H.lookup lst k
                            case res of
                                Nothing -> do
                                    r1 <- readIORef v
                                    r2 <- newIORef r1
                                    H.insert lst k r2
                                Just v1 -> do
                                    r1 <- readIORef v1
                                    r2 <- readIORef v
                                    writeIORef v1 (r1+r2)
                            return lst) result ht) results
    return result
    
calculate input beg size incr = do
    ht <- newTable
    let
        calculate' :: S.ByteString -> Int -> IO HashTable
        calculate' str i 
         | i >= ((S.length input)+1 - size) = return ht
         | otherwise = do
            res <- H.lookup ht k
            case res of
                Nothing -> do
                    !r <- newIORef 1
                    H.insert ht k r
                Just v -> do
                    !r <- readIORef v
                    writeIORef v (r+1)
            calculate' (S.drop incr str) (i+incr)
                where k = S.take size str
    calculate' (S.drop beg input) beg

type HashTable = H.BasicHashTable S.ByteString (IORef Int) 
newTable :: IO HashTable
newTable = H.new


_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe <at> haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe




--
Gregory Collins <greg <at> gregorycollins.net>
_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe <at> haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe
Branimir Maksimovic | 24 Mar 20:39 2013
Picon

Re: my take at knucleotide

I have placed constraint on version of hashable, time is exactly same.
bmaxa <at> maxa:~/shootout/knucleotide$ cabal list hashable
* hashable
    Synopsis: A class for types that can be converted to a hash value
    Default available version: 1.2.0.5
    Installed versions: 1.1.2.5
    Homepage: http://github.com/tibbe/hashable
    License:  BSD3


Date: Sun, 24 Mar 2013 20:12:57 +0100
Subject: Re: [Haskell-cafe] my take at knucleotide
From: greg <at> gregorycollins.net
To: bmaxa <at> hotmail.com
CC: haskell-cafe <at> haskell.org

What happens to performance if you compile and link with "cabal install --constraint='hashable < 1.2'" ?

G



_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe <at> haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe
Branimir Maksimovic | 26 Mar 16:51 2013
Picon

Re: my take at knucleotide

Finally, I have made it ;)
Trick was in more threads . For some reason if I run 64 (sweet spot) threads program runs
faster both with -threaded and without ;)
Other trick is that I don't convert to uppercase (shaves second) rather pack nucleotide
in 64 bit int.
Program runs 30% faster multithreaded (scales better) than current entry, and consumes 50% less memory,
and is shorter.
If someone can see some improvement please post, otherwise I will contribute this program.


{-# Language BangPatterns #-}
--
-- The Computer Language Benchmarks Game
-- http://benchmarksgame.alioth.debian.org/
--
-- Contributed by Branimir Maksimovic
--
import Data.Bits
import Data.Char
import Data.Int
import Data.List
import Data.IORef
import Data.Array.Unboxed
import Data.Array.Base
import qualified Data.HashTable.IO as H
import Data.Hashable
import qualified Data.ByteString.Char8 as S
import Control.Concurrent
import Text.Printf

main = do
    s <- S.getContents
    let
        (_,subs) = S.breakSubstring (S.pack ">THREE") s
        content = (S.filter ((/=) '\n') . 
                   S.dropWhile ((/=) '\n')) subs 
    mapM_ (execute content) actions

                    
data Actions = I Int | S String
actions = [I 1,I 2,
           S "GGT",S "GGTA",S "GGTATT",S "GGTATTTTAATT",S "GGTATTTTAATTTATAGT"]
execute content (I i) = writeFrequencies content i
execute content (S s) = writeCount content s

writeFrequencies :: S.ByteString -> Int -> IO ()
writeFrequencies input size = do
    ht <- tcalculate input size
    lst <- H.foldM (\lst (k,v)->do 
        v' <- readIORef v
        return $ insertBy (\(_,x) (_,y)->y `compare` x) (k,v') lst) [] ht
    let sum = fromIntegral ((S.length input) + 1 - size)
    mapM_ (\(k,v)-> do
        printf "%s %.3f\n" 
            (toString k) ((100 * (fromIntegral v)/sum)::Double)) lst
    putChar '\n'

writeCount :: S.ByteString -> String -> IO ()
writeCount input string = do
    let size = length string
    ht <- tcalculate input size
    let k = T (toNum (S.pack string) 0 size) size
    res <- H.lookup ht k
    case res of 
        Nothing -> putStrLn $ string ++ " not found..."
        Just v -> do
            r <- readIORef v
            printf "%d\t%s\n" r string

tcalculate :: S.ByteString -> Int -> IO HM
tcalculate input size = do
    let 
        l = [0..63]
        actions = map (\i -> (calculate input i size (length l))) l
    vars <- mapM (\action -> do
                    var <- newEmptyMVar
                    forkIO $ do
                        answer <- action
                        putMVar var answer
                    return var) actions
    result <- newTable :: IO HM
    results <- mapM takeMVar vars
    mapM_ (\ht -> H.foldM (\lst (k,v) -> do 
                            res <- H.lookup lst k
                            case res of
                                Nothing -> do
                                    r1 <- readIORef v
                                    r2 <- newIORef r1
                                    H.insert lst k r2
                                Just v1 -> do
                                    r1 <- readIORef v1
                                    r2 <- readIORef v
                                    writeIORef v1 (r1+r2)
                            return lst) result ht) results
    return result

calculate :: S.ByteString -> Int -> Int -> Int -> IO HM    
calculate input beg size incr = do
    !ht <- newTable :: IO HM
    let
        calculate' i 
         | i >= ((S.length input)+1 - size) = return ht
         | otherwise = do
            res <- H.lookup ht k
            case res of
                Nothing -> do
                    !r <- newIORef 1
                    H.insert ht k r
                Just v -> do
                    !r <- readIORef v
                    writeIORef v (r+1)
            calculate' (i+incr)
                where k = T (toNum input i size) size
    calculate' beg

toNum :: S.ByteString -> Int -> Int -> Int64
toNum s beg size = toNum' 0 size
    where
        toNum' v 0 = v
        toNum' v i = toNum' ((v `shiftL` 2) .|. 
                    (toNumA `unsafeAt` (ord (S.index s (beg+i-1))))) (i-1)

toString :: T -> String
toString (T v s) = toString' v s
    where
        toString' v 0 = []
        toString' v i = case v.&.3 of
                        0 -> 'A'
                        1 -> 'C'
                        2 -> 'T'
                        3 -> 'G'
                      : toString' (v `shiftR` 2) (i-1)

toNumA :: UArray Int Int64
toNumA = array (0,255) [(ord 'a',0),(ord 'c',1),(ord 't',2),(ord 'g',3),
            (ord 'A',0),(ord 'C',1),(ord 'T',2),(ord 'G',3)]

data T = T !Int64 !Int
instance Eq T where
    (T a _) == (T b _) = a == b
instance Hashable T where
    hashWithSalt _ (T a _) = fromIntegral a

type HM = H.BasicHashTable T (IORef Int)
newTable = H.new


Date: Sun, 24 Mar 2013 20:12:57 +0100
Subject: Re: [Haskell-cafe] my take at knucleotide
From: greg <at> gregorycollins.net
To: bmaxa <at> hotmail.com
CC: haskell-cafe <at> haskell.org

What happens to performance if you compile and link with "cabal install --constraint='hashable < 1.2'" ?

G




_______________________________________________ Haskell-Cafe mailing list Haskell-Cafe <at> haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe
_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe <at> haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe
Branimir Maksimovic | 27 Mar 12:16 2013
Picon

Re: my take at knucleotide

I have posted this version.<!-- .ExternalClass .ecxhmmessage P { padding:0px; } .ExternalClass body.ecxhmmessage { font-size:12pt; font-family:Calibri; } -->
Mad home grown HashMap and replaced IOref with Ptr.
This made program twice as fast as current entry.

{-# Language BangPatterns #-}
--
-- The Computer Language Benchmarks Game
-- http://benchmarksgame.alioth.debian.org/
--
-- Contributed by Branimir Maksimovic
--
import Data.Bits
import Data.Char
import Data.Int
import Data.List
import Data.Array.Base
import Data.Array.Unboxed
import Data.Array.IO
import qualified Data.ByteString.Char8 as S
import Foreign.Ptr
import Foreign.Storable
import Foreign.Marshal.Alloc
import Control.Concurrent
import Text.Printf

main = do
    let skip = do
            l <- S.getLine
            if S.isPrefixOf (S.pack ">THREE") l
                then return ()
                else skip
    skip
    s <- S.getContents
    let content = S.filter ((/=) '\n') s;
    mapM_ (execute content) actions
    
data Actions = I Int | S String
actions = [I 1,I 2,
           S "GGT",S "GGTA",S "GGTATT",S "GGTATTTTAATT",S "GGTATTTTAATTTATAGT"]
execute content (I i) = writeFrequencies content i
execute content (S s) = writeCount content s

writeFrequencies :: S.ByteString -> Int -> IO ()
writeFrequencies input size = do
    ht <- tcalculate input size
    lst <- Main.foldM (\lst (k,v)->do 
        v' <- peek v
        return $ (k,v'):lst) [] ht
    let sorted = sortBy (\(_,x) (_,y) -> y `compare` x) lst
        sum = fromIntegral ((S.length input) + 1 - size)
    mapM_ (\(k,v)-> do
        printf "%s %.3f\n" 
            (toString k) ((100 * (fromIntegral v)/sum)::Double)) sorted
    putChar '\n'

writeCount :: S.ByteString -> String -> IO ()
writeCount input string = do
    let size = length string
        k = T (toNum (S.pack string) 0 size) size
    ht <- tcalculate input size
    res <- Main.lookup ht k
    case res of 
        Nothing -> putStrLn $ string ++ " not found..."
        Just v -> do
            r <- peek v
            printf "%d\t%s\n" r string

tcalculate :: S.ByteString -> Int -> IO HM
tcalculate input size = do
    let 
        l = [0..63]
        actions = map (\i -> (calculate input i size (length l))) l
    vars <- mapM (\action -> do
                    var <- newEmptyMVar
                    forkIO $ do
                        answer <- action
                        putMVar var answer
                    return var) actions
    result <- newTable :: IO HM
    results <- mapM takeMVar vars
    mapM_ (\ht -> Main.foldM (\lst (k,v) -> do 
                            res <- Main.lookup lst k
                            case res of
                                Nothing -> do
                                    r1 <- peek v
                                    r2 <- malloc
                                    poke r2 r1
                                    Main.insert lst k r2
                                Just v1 -> do
                                    r1 <- peek v1
                                    r2 <- peek v
                                    poke v1 (r1+r2)
                            return lst) result ht) results
    return result

calculate :: S.ByteString -> Int -> Int -> Int -> IO HM    
calculate input beg size incr = do
    !ht <- newTable :: IO HM
    let
        calculate' i 
         | i >= ((S.length input)+1 - size) = return ht
         | otherwise = do
            let k =  T (toNum input i size) size
            res <- Main.lookup ht k
            case res of
                Nothing -> do
                    !r <- malloc
                    poke r 1
                    Main.insert ht k r
                Just v -> do 
                    !r <- peek v
                    poke v (r+1)
            calculate' (i+incr)
    calculate' beg

toNum :: S.ByteString -> Int -> Int -> Int64
toNum s beg size = toNum' 0 size
    where
        toNum' v 0 = v
        toNum' v i = toNum' ((v `shiftL` 2) .|. 
                    (toNumA `unsafeAt` (ord (S.index s (beg+i-1))))) (i-1)

toString :: T -> String
toString (T v s) = toString' v s
    where
        toString' v 0 = []
        toString' v i = case v.&.3 of
                        0 -> 'A'
                        1 -> 'C'
                        2 -> 'T'
                        3 -> 'G'
                      : toString' (v `shiftR` 2) (i-1)

toNumA :: UArray Int Int64
toNumA = array (0,255) [(ord 'a',0),(ord 'c',1),(ord 't',2),(ord 'g',3),
            (ord 'A',0),(ord 'C',1),(ord 'T',2),(ord 'G',3)]

data T = T !Int64 !Int
instance Eq T where
    (T a _) == (T b _) = a == b
class Hash h where
    hash :: h -> Int64
instance Hash T where
    hash (T a _) = a

type HM = HashMap T (Ptr Int)
data HashMap k v = HashMap !(IOArray Int64 [(k,v)])
tsz = 4096
newTable :: IO (HashMap k v)
newTable = do
    !array <- newArray (0,(tsz-1)) []
    return $ HashMap array

lookup :: (Eq k, Hash k)=>HashMap k v -> k -> IO (Maybe v)
lookup (HashMap a) k = do
    let h = hash k
    !lst <- readArray a (h .&. (tsz-1))
    let
        loop [] = return Nothing
        loop ((!k',!v):xs) 
            | k /= k' = loop xs
            | otherwise = return $ Just v
    loop lst

insert :: (Eq k, Hash k)=>HashMap k v -> k -> v -> IO ()
insert (HashMap a) k v = do
    let h = hash k
    !lst <- readArray a (h .&. (tsz-1))
    writeArray a (h .&. (tsz-1)) ((k,v):lst)

foldM :: ( a -> (b,c) -> IO a) -> a -> HashMap b c -> IO a
foldM f s (HashMap a) = do
    let 
        loop 0 s' = return s'
        loop i s' = do
            !lst <- readArray a (i-1)
            let 
                loop' [] s' = return s'
                loop' (x:xs) s' = do
                    !s'' <- f s' x
                    loop' xs s''
            !s'' <- loop' lst s'
            loop (i-1) s''
    loop tsz s

_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe <at> haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe

Gmane