{-# LANGUAGE OverloadedStrings #-}

module Lexer
( space, lexeme, symbol
, number
, identifier
, charLiteral, stringLiteral
, surroundWith, parens, brackets, sqBrackets
) where

import Text.Megaparsec.Char ( space1, char, hexDigitChar, letterChar )
import qualified Text.Megaparsec.Char.Lexer as L
import Text.Megaparsec
import Parser.Util
import Data.Scientific (Scientific)
import Numeric (readHex)
import Data.Text (Text, cons)
import Data.Char (isAlphaNum)

-- | Parses whitespace.
space :: Parser ()
space = L.space space1 (L.skipLineComment "//") (L.skipBlockComment "/*" "*/")

-- | Wrapper for lexemes.
lexeme :: Parser a -> Parser a
lexeme = L.lexeme space

symbol :: Text -> Parser Text
symbol = L.symbol space

-- -- | Parse a number with an optional sign.
-- signed :: Num a => Parser a -> Parser a
-- signed = L.signed space

-- | Parse a number. This number can have arbitrary integer and fractional digits.
number :: Parser Scientific
number = lexeme L.scientific
-- TODO: parse binary and hexadecimal digits.

identifier :: Parser Text
identifier = lexeme $ cons <$> (letterChar <|> char '_') <*> takeWhileP (Just "letter") (\c -> isAlphaNum c || c == '_')

-- | Parse an escape sequence.
escapedChar :: Parser Char
escapedChar = char '\\' *> choice
  [ '"'  <$ char '"'   -- Escaped double quote.
  , '\'' <$ char '\''  -- Escaped single quote.
  , '\\' <$ char '\\'  -- Escaped backslash.
  , '\n' <$ char 'n'   -- Newline.
  , '\t' <$ char 't'   -- Tab.
  , '\r' <$ char 'r'   -- Carriage return.
  , '\f' <$ char 'f'   -- Form feed.
  , '\b' <$ char 'b'   -- Backspace.
  , '\v' <$ char 'v'   -- Vertical tab.
  , unicodeEscape      -- Unicode escape sequences.
  ]
  -- TODO: better error message (say "not a valid escape sequence" instead of "unexpected... expecting ...")

-- | Parse an escape sequence for unicode characters.
unicodeEscape :: Parser Char
unicodeEscape = check . readHex =<< (char 'u' *> count 4 hexDigitChar)
  where
    check [(n, "")] = return $ toEnum n
    check _         = fail "Invalid Unicode escape sequence"

-- | Parse a string literal.
stringLiteral :: Parser String
stringLiteral = char '"' *> manyTill (escapedChar <|> noneOf ['\\', '"'] <?> "character") (char '"')

-- | Parse a character literal.
charLiteral :: Parser Char
charLiteral = char '\'' *> (escapedChar <|> anySingleBut '\\' <?> "character") <* char '\''

surroundWith :: Char -> Char -> Parser a -> Parser a
surroundWith l r p = char l *> p <* char r

parens :: Parser a -> Parser a
parens = surroundWith '(' ')'

brackets :: Parser a -> Parser a
brackets = surroundWith '{' '}'

sqBrackets :: Parser a -> Parser a
sqBrackets = surroundWith '[' ']'