Skip to content

Commit e0342a8

Browse files
committed
Add module Text.Pandoc.Format.Input
A new module Text.Pandoc.Format.Input is added and exposed to library users. Types supported as input format can be represented as value of the *InputFormat* type.
1 parent 41af476 commit e0342a8

File tree

6 files changed

+308
-99
lines changed

6 files changed

+308
-99
lines changed

pandoc.cabal

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,9 @@ library
663663
Text.Pandoc.Filter.Environment,
664664
Text.Pandoc.Filter.JSON,
665665
Text.Pandoc.Filter.Lua,
666+
Text.Pandoc.Format.Extensions,
667+
Text.Pandoc.Format.Input,
668+
Text.Pandoc.Format.Flavored,
666669
Text.Pandoc.Parsing.Capabilities,
667670
Text.Pandoc.Parsing.Citations,
668671
Text.Pandoc.Parsing.General,

src/Text/Pandoc/App.hs

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ import System.IO (nativeNewline, stdout)
4949
import qualified System.IO as IO (Newline (..))
5050
import Text.Pandoc
5151
import Text.Pandoc.Builder (setMeta)
52+
import qualified Text.Pandoc.Format.Input as Input
5253
import Text.Pandoc.MediaBag (mediaItems)
5354
import Text.Pandoc.MIME (getCharset, MimeType)
5455
import Text.Pandoc.Image (svgToPng)
55-
import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
5656
import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
5757
IpynbOutput (..))
5858
import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
@@ -116,18 +116,19 @@ convertWithOpts opts = do
116116
setOutputFile (optOutputFile opts)
117117

118118
-- assign reader and writer based on options and filenames
119-
readerName <- case optFrom opts of
120-
Just f -> return f
121-
Nothing -> case formatFromFilePaths sources of
122-
Just f' -> return f'
123-
Nothing | sources == ["-"] -> return "markdown"
124-
| any (isURI . T.pack) sources -> return "html"
125-
| otherwise -> do
126-
report $ CouldNotDeduceFormat
127-
(map (T.pack . takeExtension) sources) "markdown"
128-
return "markdown"
129-
130-
let readerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') readerName
119+
readerFormat <- case optFrom opts of
120+
Just spec -> Input.flavoredFromSpec spec
121+
Nothing ->
122+
let defFlavor = return . Input.defaultFlavor
123+
in case Input.flavoredFormatFromFilePaths sources of
124+
Just f' -> return f'
125+
Nothing
126+
| sources == ["-"] -> defFlavor Input.Markdown
127+
| any (isURI . T.pack) sources -> defFlavor Input.HTML
128+
| otherwise -> do
129+
report $ CouldNotDeduceFormat
130+
(map (T.pack . takeExtension) sources) "markdown"
131+
defFlavor Input.Markdown
131132

132133
let makeSandboxed pureReader =
133134
let files = maybe id (:) (optReferenceDoc opts) .
@@ -142,14 +143,13 @@ convertWithOpts opts = do
142143
ByteStringReader r
143144
-> ByteStringReader $ \o t -> sandbox files (r o t)
144145

145-
(reader, readerExts) <-
146-
if ".lua" `T.isSuffixOf` readerName
147-
then return (TextReader (readCustom (T.unpack readerName)), mempty)
148-
else if optSandbox opts
149-
then case runPure (getReader readerName) of
150-
Left e -> throwError e
151-
Right (r, rexts) -> return (makeSandboxed r, rexts)
152-
else getReader readerName
146+
(reader, readerExts) <- case readerFormat of
147+
Input.CustomFormat luaFilename ->
148+
return (TextReader (readCustom luaFilename), mempty)
149+
Input.KnownFormat infrmt exts ->
150+
if optSandbox opts
151+
then return (makeSandboxed (readerForFormat infrmt), exts)
152+
else return (readerForFormat infrmt, exts)
153153

154154
outputSettings <- optToOutputSettings opts
155155
let format = outputFormat outputSettings
@@ -169,7 +169,7 @@ convertWithOpts opts = do
169169
pdfOutput ||
170170
bibOutput
171171

172-
when (pdfOutput && readerNameBase == "latex") $
172+
when (pdfOutput && readerFormat `Input.hasBase` Input.LaTeX) $
173173
case optInputFiles opts of
174174
Just (inputFile:_) -> report $ UnusualConversion $ T.pack $
175175
"to convert a .tex file to PDF, you get better results by using pdflatex "
@@ -222,7 +222,7 @@ convertWithOpts opts = do
222222
-- If format is markdown or commonmark, use the enabled extensions,
223223
-- otherwise treat metadata as pandoc markdown (see #7926, #6832)
224224
let readerOptsMeta =
225-
if readerNameBase == "markdown" || readerNameBase == "commonmark"
225+
if readerFormat `Input.hasBaseIn` [Input.Markdown, Input.CommonMark]
226226
then readerOpts
227227
else readerOpts{ readerExtensions = pandocExtensions }
228228
mconcat <$> mapM
@@ -243,7 +243,7 @@ convertWithOpts opts = do
243243
then (eastAsianLineBreakFilter :)
244244
else id) .
245245
(case optIpynbOutput opts of
246-
_ | readerNameBase /= "ipynb" -> id
246+
_ | not (readerFormat `Input.hasBase` Input.Ipynb) -> id
247247
IpynbOutputAll -> id
248248
IpynbOutputNone -> (filterIpynbOutput Nothing :)
249249
IpynbOutputBest -> (filterIpynbOutput (Just $
@@ -257,14 +257,13 @@ convertWithOpts opts = do
257257
$ []
258258

259259
let convertTabs = tabFilter (if optPreserveTabs opts ||
260-
readerNameBase == "t2t" ||
261-
readerNameBase == "man" ||
262-
readerNameBase == "tsv"
260+
readerFormat `Input.hasBaseIn`
261+
[Input.T2T, Input.Man, Input.TSV]
263262
then 0
264263
else optTabStop opts)
265264

266265

267-
when (readerNameBase == "markdown_github" ||
266+
when (readerFormat `Input.hasBase` Input.Markdown_GitHub ||
268267
writerNameBase == "markdown_github") $
269268
report $ Deprecated "markdown_github" "Use gfm instead."
270269

@@ -293,7 +292,7 @@ convertWithOpts opts = do
293292

294293
doc <- (case reader of
295294
TextReader r
296-
| readerNameBase == "json" ->
295+
| readerFormat `Input.hasBase` Input.JSON ->
297296
mconcat <$>
298297
mapM (inputToText convertTabs
299298
>=> r readerOpts . (:[])) inputs
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{- |
2+
Module : Text.Pandoc.Extensions
3+
Copyright : © 2012-2022 John MacFarlane
4+
License : GPL-2.0-or-later
5+
Maintainer : John MacFarlane <[email protected]>
6+
7+
Data structures and functions for representing markup extensions.
8+
-}
9+
module Text.Pandoc.Format.Extensions
10+
( module Text.Pandoc.Extensions )
11+
where
12+
import Text.Pandoc.Extensions

src/Text/Pandoc/Format/Flavored.hs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{-# LANGUAGE LambdaCase #-}
2+
{-# LANGUAGE OverloadedStrings #-}
3+
{- |
4+
Module : Text.Pandoc.Format.Flavored
5+
Copyright : © 2012-2022 John MacFarlane, 2019-2022 Albert Krewinkel
6+
License : GPL-2.0-or-later
7+
Maintainer : John MacFarlane <[email protected]>
8+
9+
Data structures and functions for representing markup extensions.
10+
-}
11+
module Text.Pandoc.Format.Flavored
12+
( module Text.Pandoc.Format.Extensions
13+
, Flavored (..)
14+
, unflavored
15+
, hasBase
16+
, hasBaseIn
17+
)
18+
where
19+
20+
import Prelude
21+
import Text.Pandoc.Format.Extensions
22+
23+
-- | Full description of a format, including the selected set of extensions.
24+
data Flavored f = KnownFormat f Extensions
25+
| CustomFormat FilePath
26+
deriving (Show, Read, Eq)
27+
28+
unflavored :: Flavored f -> Maybe f
29+
unflavored = \case
30+
KnownFormat f _ -> Just f
31+
_ -> Nothing
32+
33+
hasBase :: Eq f => Flavored f -> f -> Bool
34+
hasBase = \case
35+
KnownFormat f _ -> (f ==)
36+
_ -> const False
37+
38+
hasBaseIn :: Eq f => Flavored f -> [f] -> Bool
39+
hasBaseIn = \case
40+
KnownFormat f _ -> (f `elem`)
41+
_ -> const False

src/Text/Pandoc/Format/Input.hs

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
{-# LANGUAGE CPP #-}
2+
{-# LANGUAGE DeriveDataTypeable #-}
3+
{-# LANGUAGE DeriveGeneric #-}
4+
{-# LANGUAGE OverloadedStrings #-}
5+
module Text.Pandoc.Format.Input
6+
( InputFormat (..)
7+
, allInputFormats
8+
, name
9+
, fromName
10+
, defaultFlavor
11+
, flavoredFromSpec
12+
, flavoredFormatFromFilePath
13+
, flavoredFormatFromFilePaths
14+
, module Text.Pandoc.Format.Flavored
15+
) where
16+
17+
import Control.Monad (mplus, unless)
18+
import Control.Monad.Except (throwError)
19+
import Data.Char (toLower)
20+
import Data.Data (Data)
21+
import Data.Map.Strict (Map)
22+
import Data.Set (Set)
23+
import Data.Text (Text)
24+
import Data.Typeable (Typeable)
25+
import GHC.Generics (Generic)
26+
import System.FilePath (takeExtension)
27+
import Text.Pandoc.Error
28+
import Text.Pandoc.Format.Flavored
29+
import Text.Pandoc.Class.PandocMonad (PandocMonad)
30+
import qualified Data.Map.Strict as Map
31+
import qualified Data.Set as Set
32+
import qualified Data.Text as T
33+
34+
-- | Formats that pandoc knows how to parse.
35+
data InputFormat
36+
= BibLaTeX
37+
| BibTeX
38+
| CSV
39+
| CommonMark
40+
| CommonMark_X
41+
| Creole
42+
| CslJson
43+
| DocBook
44+
| Docx
45+
| DokuWiki
46+
| EndNoteXML
47+
| EPUB
48+
| FB2
49+
| GFM
50+
| HTML
51+
| Haddock
52+
| Ipynb
53+
| JATS
54+
| JSON
55+
| Jira
56+
| LaTeX
57+
| Man
58+
| Markdown
59+
| Markdown_GitHub
60+
| Markdown_MMD
61+
| Markdown_PHPExtra
62+
| Markdown_strict
63+
| MediaWiki
64+
| Muse
65+
| Native
66+
| ODT
67+
| OPML
68+
| Org
69+
| RIS
70+
| RST
71+
| TSV
72+
| TWiki
73+
| Textile
74+
| TikiWiki
75+
| T2T -- txt2tags
76+
| Vimwiki
77+
deriving (Bounded, Enum, Eq, Data, Generic, Ord, Read, Show, Typeable)
78+
79+
name :: InputFormat -> Text
80+
name = T.toLower . T.pack . show
81+
82+
-- | List of all formats of which pandoc is aware.
83+
allInputFormats :: Set InputFormat
84+
allInputFormats = Set.fromAscList [minBound .. maxBound]
85+
86+
names :: Map Text InputFormat
87+
names = Map.fromList $
88+
map (\f -> (T.toLower (T.pack $ show f), f)) (Set.toList allInputFormats)
89+
90+
-- | Get a format from a string identifier.
91+
fromName :: Text -> Maybe InputFormat
92+
fromName = flip Map.lookup names
93+
94+
-- | Returns the default flavor of an input format.
95+
defaultFlavor :: InputFormat -> Flavored InputFormat
96+
defaultFlavor f = KnownFormat f (getDefaultExtensions $ name f)
97+
98+
-- | Determine format based on file extensions
99+
flavoredFormatFromFilePaths :: [FilePath] -> Maybe (Flavored InputFormat)
100+
flavoredFormatFromFilePaths =
101+
foldr (mplus . flavoredFormatFromFilePath) Nothing
102+
103+
-- | Determine format based on file extension
104+
flavoredFormatFromFilePath :: FilePath -> Maybe (Flavored InputFormat)
105+
flavoredFormatFromFilePath fp =
106+
let
107+
defaultExts = return . defaultFlavor
108+
modifiedExts f extsMod = return $
109+
KnownFormat f (extsMod . getDefaultExtensions $ name f)
110+
in case takeExtension (map toLower fp) of
111+
".db" -> defaultExts DocBook
112+
".docx" -> defaultExts Docx
113+
".dokuwiki" -> defaultExts DokuWiki
114+
".epub" -> defaultExts EPUB
115+
".fb2" -> defaultExts FB2
116+
".htm" -> defaultExts HTML
117+
".html" -> defaultExts HTML
118+
".json" -> defaultExts JSON
119+
".latex" -> defaultExts LaTeX
120+
".lhs" -> modifiedExts Markdown (enableExtension Ext_literate_haskell)
121+
".ltx" -> defaultExts LaTeX
122+
".markdown" -> defaultExts Markdown
123+
".md" -> defaultExts Markdown
124+
".muse" -> defaultExts Muse
125+
".native" -> defaultExts Native
126+
".opml" -> defaultExts OPML
127+
".org" -> defaultExts Org
128+
".rst" -> defaultExts RST
129+
".t2t" -> defaultExts T2T
130+
".tex" -> defaultExts LaTeX
131+
".text" -> defaultExts Markdown
132+
".textile" -> defaultExts Textile
133+
".txt" -> defaultExts Markdown
134+
".wiki" -> defaultExts MediaWiki
135+
".xhtml" -> defaultExts HTML
136+
['.',y] | y `elem` ['1'..'9'] -> defaultExts Man
137+
_ -> Nothing
138+
139+
-- | Parses an input format spec.
140+
flavoredFromSpec :: PandocMonad m => Text -> m (Flavored InputFormat)
141+
flavoredFromSpec s =
142+
case parseFormatSpec s of
143+
Left e -> throwError $ PandocAppError $ mconcat
144+
[ "Error parsing reader format "
145+
, T.pack (show s)
146+
, ": "
147+
, T.pack (show e)
148+
]
149+
Right (readerName, extsToEnable, extsToDisable) ->
150+
case fromName readerName of
151+
Nothing -> if ".lua" `T.isSuffixOf` readerName
152+
then return $ CustomFormat (T.unpack readerName)
153+
else throwError $ PandocUnknownReaderError readerName
154+
Just inputFormat -> do
155+
let allExts = getAllExtensions readerName
156+
let exts = foldr disableExtension
157+
(foldr enableExtension
158+
(getDefaultExtensions readerName)
159+
extsToEnable) extsToDisable
160+
mapM_ (\ext ->
161+
unless (extensionEnabled ext allExts) $
162+
throwError $
163+
PandocUnsupportedExtensionError
164+
(T.drop 4 $ T.pack $ show ext) readerName)
165+
(extsToEnable ++ extsToDisable)
166+
return $ KnownFormat inputFormat exts

0 commit comments

Comments
 (0)