From 7c6c9427b5dcd7a917b4cd19a13527dbd74bfc94 Mon Sep 17 00:00:00 2001 From: Masahiro Sakai Date: Sun, 4 Jan 2015 01:33:12 +0900 Subject: [PATCH] explicitly specify the encoding of source files as UTF-8 --- src/Language/Haskell/Extract.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Language/Haskell/Extract.hs b/src/Language/Haskell/Extract.hs index 3e8958b..2a30a0b 100644 --- a/src/Language/Haskell/Extract.hs +++ b/src/Language/Haskell/Extract.hs @@ -6,11 +6,20 @@ module Language.Haskell.Extract ( import Language.Haskell.TH import Text.Regex.Posix import Data.List +import System.IO extractAllFunctions :: String -> Q [String] extractAllFunctions pattern = do loc <- location - file <- runIO $ readFile $ loc_filename loc + file <- runIO $ do + h <- openFile (loc_filename loc) ReadMode + -- Haskell programs are usually written in UTF-8, but the default file + -- encoding is determined by locale and may be different (e.g. CP932 encoding + -- is used on Japanese Windows environment). + -- To avoid "hGetContents: invalid argument (invalid byte sequence)" error + -- on such cases, we explicitly specify file encoding as UTF-8. + hSetEncoding h =<< mkTextEncoding "UTF-8//IGNORE" + hGetContents h return $ nub $ filter (=~pattern) $ map fst $ concat $ map lex $ lines file -- | Extract the names and functions from the module where this function is called.