Skip to content

Commit 3860096

Browse files
committed
Add module Text.Pandoc.Format.Input
A new module Text.Pandoc.Format.Input is added and exposed to library users. Types supported as input format can be represented as value of the *InputFormat* type.
1 parent 41af476 commit 3860096

File tree

6 files changed

+308
-99
lines changed

6 files changed

+308
-99
lines changed

pandoc.cabal

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,9 @@ library
663663
Text.Pandoc.Filter.Environment,
664664
Text.Pandoc.Filter.JSON,
665665
Text.Pandoc.Filter.Lua,
666+
Text.Pandoc.Format.Extensions,
667+
Text.Pandoc.Format.Flavored,
668+
Text.Pandoc.Format.Input,
666669
Text.Pandoc.Parsing.Capabilities,
667670
Text.Pandoc.Parsing.Citations,
668671
Text.Pandoc.Parsing.General,

src/Text/Pandoc/App.hs

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ import System.IO (nativeNewline, stdout)
4949
import qualified System.IO as IO (Newline (..))
5050
import Text.Pandoc
5151
import Text.Pandoc.Builder (setMeta)
52+
import qualified Text.Pandoc.Format.Input as Input
5253
import Text.Pandoc.MediaBag (mediaItems)
5354
import Text.Pandoc.MIME (getCharset, MimeType)
5455
import Text.Pandoc.Image (svgToPng)
55-
import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
5656
import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
5757
IpynbOutput (..))
5858
import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
@@ -116,18 +116,19 @@ convertWithOpts opts = do
116116
setOutputFile (optOutputFile opts)
117117

118118
-- assign reader and writer based on options and filenames
119-
readerName <- case optFrom opts of
120-
Just f -> return f
121-
Nothing -> case formatFromFilePaths sources of
122-
Just f' -> return f'
123-
Nothing | sources == ["-"] -> return "markdown"
124-
| any (isURI . T.pack) sources -> return "html"
125-
| otherwise -> do
126-
report $ CouldNotDeduceFormat
127-
(map (T.pack . takeExtension) sources) "markdown"
128-
return "markdown"
129-
130-
let readerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') readerName
119+
readerFormat <- case optFrom opts of
120+
Just spec -> Input.flavoredFromSpec spec
121+
Nothing ->
122+
let defFlavor = return . Input.defaultFlavor
123+
in case Input.flavoredFormatFromFilePaths sources of
124+
Just f' -> return f'
125+
Nothing
126+
| sources == ["-"] -> defFlavor Input.Markdown
127+
| any (isURI . T.pack) sources -> defFlavor Input.HTML
128+
| otherwise -> do
129+
report $ CouldNotDeduceFormat
130+
(map (T.pack . takeExtension) sources) "markdown"
131+
defFlavor Input.Markdown
131132

132133
let makeSandboxed pureReader =
133134
let files = maybe id (:) (optReferenceDoc opts) .
@@ -142,14 +143,13 @@ convertWithOpts opts = do
142143
ByteStringReader r
143144
-> ByteStringReader $ \o t -> sandbox files (r o t)
144145

145-
(reader, readerExts) <-
146-
if ".lua" `T.isSuffixOf` readerName
147-
then return (TextReader (readCustom (T.unpack readerName)), mempty)
148-
else if optSandbox opts
149-
then case runPure (getReader readerName) of
150-
Left e -> throwError e
151-
Right (r, rexts) -> return (makeSandboxed r, rexts)
152-
else getReader readerName
146+
(reader, readerExts) <- case readerFormat of
147+
Input.CustomFormat luaFilename ->
148+
return (TextReader (readCustom luaFilename), mempty)
149+
Input.KnownFormat infrmt exts -> return $
150+
if optSandbox opts
151+
then (makeSandboxed (readerForFormat infrmt), exts)
152+
else (readerForFormat infrmt, exts)
153153

154154
outputSettings <- optToOutputSettings opts
155155
let format = outputFormat outputSettings
@@ -169,7 +169,7 @@ convertWithOpts opts = do
169169
pdfOutput ||
170170
bibOutput
171171

172-
when (pdfOutput && readerNameBase == "latex") $
172+
when (pdfOutput && readerFormat `Input.hasBase` Input.LaTeX) $
173173
case optInputFiles opts of
174174
Just (inputFile:_) -> report $ UnusualConversion $ T.pack $
175175
"to convert a .tex file to PDF, you get better results by using pdflatex "
@@ -222,7 +222,7 @@ convertWithOpts opts = do
222222
-- If format is markdown or commonmark, use the enabled extensions,
223223
-- otherwise treat metadata as pandoc markdown (see #7926, #6832)
224224
let readerOptsMeta =
225-
if readerNameBase == "markdown" || readerNameBase == "commonmark"
225+
if readerFormat `Input.hasBaseIn` [Input.Markdown, Input.CommonMark]
226226
then readerOpts
227227
else readerOpts{ readerExtensions = pandocExtensions }
228228
mconcat <$> mapM
@@ -243,7 +243,7 @@ convertWithOpts opts = do
243243
then (eastAsianLineBreakFilter :)
244244
else id) .
245245
(case optIpynbOutput opts of
246-
_ | readerNameBase /= "ipynb" -> id
246+
_ | not (readerFormat `Input.hasBase` Input.Ipynb) -> id
247247
IpynbOutputAll -> id
248248
IpynbOutputNone -> (filterIpynbOutput Nothing :)
249249
IpynbOutputBest -> (filterIpynbOutput (Just $
@@ -257,14 +257,13 @@ convertWithOpts opts = do
257257
$ []
258258

259259
let convertTabs = tabFilter (if optPreserveTabs opts ||
260-
readerNameBase == "t2t" ||
261-
readerNameBase == "man" ||
262-
readerNameBase == "tsv"
260+
readerFormat `Input.hasBaseIn`
261+
[Input.T2T, Input.Man, Input.TSV]
263262
then 0
264263
else optTabStop opts)
265264

266265

267-
when (readerNameBase == "markdown_github" ||
266+
when (readerFormat `Input.hasBase` Input.Markdown_GitHub ||
268267
writerNameBase == "markdown_github") $
269268
report $ Deprecated "markdown_github" "Use gfm instead."
270269

@@ -293,7 +292,7 @@ convertWithOpts opts = do
293292

294293
doc <- (case reader of
295294
TextReader r
296-
| readerNameBase == "json" ->
295+
| readerFormat `Input.hasBase` Input.JSON ->
297296
mconcat <$>
298297
mapM (inputToText convertTabs
299298
>=> r readerOpts . (:[])) inputs
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{- |
2+
Module : Text.Pandoc.Extensions
3+
Copyright : © 2012-2022 John MacFarlane
4+
License : GPL-2.0-or-later
5+
Maintainer : John MacFarlane <[email protected]>
6+
7+
Data structures and functions for representing markup extensions.
8+
-}
9+
module Text.Pandoc.Format.Extensions
10+
( module Text.Pandoc.Extensions )
11+
where
12+
import Text.Pandoc.Extensions

src/Text/Pandoc/Format/Flavored.hs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{-# LANGUAGE LambdaCase #-}
2+
{-# LANGUAGE OverloadedStrings #-}
3+
{- |
4+
Module : Text.Pandoc.Format.Flavored
5+
Copyright : © 2012-2022 John MacFarlane, 2019-2022 Albert Krewinkel
6+
License : GPL-2.0-or-later
7+
Maintainer : John MacFarlane <[email protected]>
8+
9+
Data structures and functions for representing markup extensions.
10+
-}
11+
module Text.Pandoc.Format.Flavored
12+
( module Text.Pandoc.Format.Extensions
13+
, Flavored (..)
14+
, hasBase
15+
, hasBaseIn
16+
)
17+
where
18+
19+
import Prelude
20+
import Text.Pandoc.Format.Extensions
21+
22+
-- | Full description of a format, including the selected set of
23+
-- extensions.
24+
data Flavored f = KnownFormat f Extensions
25+
| CustomFormat FilePath
26+
deriving (Show, Read, Eq)
27+
28+
-- | Returns 'True' if the flavored format is based on the given format,
29+
-- and 'False' otherwise.
30+
hasBase :: Eq f => Flavored f -> f -> Bool
31+
hasBase = \case
32+
KnownFormat f _ -> (f ==)
33+
_ -> const False
34+
35+
-- | Returns 'True' if the flavored format is based on one of the listed
36+
-- format, and 'False' otherwise.
37+
hasBaseIn :: Eq f => Flavored f -> [f] -> Bool
38+
hasBaseIn = \case
39+
KnownFormat f _ -> (f `elem`)
40+
_ -> const False

src/Text/Pandoc/Format/Input.hs

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
{-# LANGUAGE CPP #-}
2+
{-# LANGUAGE DeriveDataTypeable #-}
3+
{-# LANGUAGE DeriveGeneric #-}
4+
{-# LANGUAGE OverloadedStrings #-}
5+
module Text.Pandoc.Format.Input
6+
( InputFormat (..)
7+
, allInputFormats
8+
, name
9+
, fromName
10+
, defaultFlavor
11+
, flavoredFromSpec
12+
, flavoredFormatFromFilePath
13+
, flavoredFormatFromFilePaths
14+
, module Text.Pandoc.Format.Flavored
15+
) where
16+
17+
import Control.Monad (unless)
18+
import Control.Monad.Except (throwError)
19+
import Data.Char (toLower)
20+
import Data.Data (Data)
21+
import Data.Map.Strict (Map)
22+
import Data.Monoid (First (..))
23+
import Data.Set (Set)
24+
import Data.Text (Text)
25+
import Data.Typeable (Typeable)
26+
import GHC.Generics (Generic)
27+
import System.FilePath (takeExtension)
28+
import Text.Pandoc.Error
29+
import Text.Pandoc.Format.Flavored
30+
import Text.Pandoc.Class.PandocMonad (PandocMonad)
31+
import qualified Data.Map.Strict as Map
32+
import qualified Data.Set as Set
33+
import qualified Data.Text as T
34+
35+
-- | Formats that pandoc knows how to parse.
36+
data InputFormat
37+
= BibLaTeX
38+
| BibTeX
39+
| CSV
40+
| CommonMark
41+
| CommonMark_X
42+
| Creole
43+
| CslJson
44+
| DocBook
45+
| Docx
46+
| DokuWiki
47+
| EndNoteXML
48+
| EPUB
49+
| FB2
50+
| GFM
51+
| HTML
52+
| Haddock
53+
| Ipynb
54+
| JATS
55+
| JSON
56+
| Jira
57+
| LaTeX
58+
| Man
59+
| Markdown
60+
| Markdown_GitHub
61+
| Markdown_MMD
62+
| Markdown_PHPExtra
63+
| Markdown_strict
64+
| MediaWiki
65+
| Muse
66+
| Native
67+
| ODT
68+
| OPML
69+
| Org
70+
| RIS
71+
| RST
72+
| TSV
73+
| TWiki
74+
| Textile
75+
| TikiWiki
76+
| T2T -- txt2tags
77+
| Vimwiki
78+
deriving (Bounded, Enum, Eq, Data, Generic, Ord, Read, Show, Typeable)
79+
80+
name :: InputFormat -> Text
81+
name = T.toLower . T.pack . show
82+
83+
-- | List of all formats of which pandoc is aware.
84+
allInputFormats :: Set InputFormat
85+
allInputFormats = Set.fromAscList [minBound .. maxBound]
86+
87+
names :: Map Text InputFormat
88+
names = Map.fromList $
89+
map (\f -> (T.toLower (T.pack $ show f), f)) (Set.toList allInputFormats)
90+
91+
-- | Get a format from a string identifier.
92+
fromName :: Text -> Maybe InputFormat
93+
fromName = flip Map.lookup names
94+
95+
-- | Returns the default flavor of an input format.
96+
defaultFlavor :: InputFormat -> Flavored InputFormat
97+
defaultFlavor f = KnownFormat f (getDefaultExtensions $ name f)
98+
99+
-- | Determine format based on file extensions
100+
flavoredFormatFromFilePaths :: [FilePath] -> Maybe (Flavored InputFormat)
101+
flavoredFormatFromFilePaths = getFirst .
102+
foldMap (First . flavoredFormatFromFilePath)
103+
104+
-- | Determine format based on file extension
105+
flavoredFormatFromFilePath :: FilePath -> Maybe (Flavored InputFormat)
106+
flavoredFormatFromFilePath fp =
107+
let
108+
defaultExts = return . defaultFlavor
109+
modifiedExts f extsMod = return $
110+
KnownFormat f (extsMod . getDefaultExtensions $ name f)
111+
in case takeExtension (map toLower fp) of
112+
".db" -> defaultExts DocBook
113+
".docx" -> defaultExts Docx
114+
".dokuwiki" -> defaultExts DokuWiki
115+
".epub" -> defaultExts EPUB
116+
".fb2" -> defaultExts FB2
117+
".htm" -> defaultExts HTML
118+
".html" -> defaultExts HTML
119+
".json" -> defaultExts JSON
120+
".latex" -> defaultExts LaTeX
121+
".lhs" -> modifiedExts Markdown (enableExtension Ext_literate_haskell)
122+
".ltx" -> defaultExts LaTeX
123+
".markdown" -> defaultExts Markdown
124+
".md" -> defaultExts Markdown
125+
".muse" -> defaultExts Muse
126+
".native" -> defaultExts Native
127+
".opml" -> defaultExts OPML
128+
".org" -> defaultExts Org
129+
".rst" -> defaultExts RST
130+
".t2t" -> defaultExts T2T
131+
".tex" -> defaultExts LaTeX
132+
".text" -> defaultExts Markdown
133+
".textile" -> defaultExts Textile
134+
".txt" -> defaultExts Markdown
135+
".wiki" -> defaultExts MediaWiki
136+
".xhtml" -> defaultExts HTML
137+
['.',y] | y `elem` ['1'..'9'] -> defaultExts Man
138+
_ -> Nothing
139+
140+
-- | Parses an input format spec.
141+
flavoredFromSpec :: PandocMonad m => Text -> m (Flavored InputFormat)
142+
flavoredFromSpec s =
143+
case parseFormatSpec s of
144+
Left e -> throwError $ PandocAppError $ mconcat
145+
[ "Error parsing reader format "
146+
, T.pack (show s)
147+
, ": "
148+
, T.pack (show e)
149+
]
150+
Right (readerName, extsToEnable, extsToDisable) ->
151+
case fromName readerName of
152+
Nothing -> if ".lua" `T.isSuffixOf` readerName
153+
then return $ CustomFormat (T.unpack readerName)
154+
else throwError $ PandocUnknownReaderError readerName
155+
Just inputFormat -> do
156+
let allExts = getAllExtensions readerName
157+
let exts = foldr disableExtension
158+
(foldr enableExtension
159+
(getDefaultExtensions readerName)
160+
extsToEnable) extsToDisable
161+
mapM_ (\ext ->
162+
unless (extensionEnabled ext allExts) $
163+
throwError $
164+
PandocUnsupportedExtensionError
165+
(T.drop 4 $ T.pack $ show ext) readerName)
166+
(extsToEnable ++ extsToDisable)
167+
return $ KnownFormat inputFormat exts

0 commit comments

Comments
 (0)