Skip to content

Commit b513a4e

Browse files
feat: improve Pandoc check — 44→46 modules passing, fix loader CPP/diagnostics, char literal lexing, nested where clauses
- Lexer: disambiguate `'['` as char literal vs promoted list by lookahead for closing quote (fixes Text.Pandoc.Writers.Roff) - Loader: add CPP preprocessing for imported modules (was only done in driver, causing parse failures when loading CPP-using modules as deps) - Loader: treat parser diagnostics as non-fatal — only fail if module couldn't be parsed at all, matching driver behavior - Lowering: fix where-clause scoping for multi-equation functions nested in where clauses — per-clause where bindings were silently ignored (fixes Text.Pandoc.Chunks from cascading lowering failure) - AST: add TupleSections to Extension enum; classify ~30 common GHC extensions as accepted in status() to suppress unknown warnings - Driver: add TypeErrorWithInfo error variant so type-check failures register exports and don't cascade to downstream modules - Stubs: add missing exports for Skylighting (sName, sShortname, parseTheme), Text.Collate.Lang (record fields), Data.Yaml (decodeAllEither'), Data.Aeson.TH (camelTo2) - Type checker: polymorphic Semigroup/Monoid, expanded builtin stubs, cross-module type alias propagation
1 parent c12f43e commit b513a4e

17 files changed

Lines changed: 4512 additions & 591 deletions

File tree

.claude/ROADMAP.md

Lines changed: 503 additions & 257 deletions
Large diffs are not rendered by default.

.claude/TODO-pandoc.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
**Document ID:** BHC-TODO-PANDOC
44
**Status:** In Progress
55
**Created:** 2026-01-30
6-
**Updated:** 2026-02-27
6+
**Updated:** 2026-03-04
7+
8+
**See also:** [ROADMAP.md](ROADMAP.md) — GHC boot library vs third-party package
9+
classification and milestone plan (P1–P5).
710

811
---
912

crates/bhc-ast/src/lib.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,8 @@ pub enum Extension {
190190
NamedFieldPuns,
191191
/// Overloaded strings
192192
OverloadedStrings,
193+
/// Tuple sections
194+
TupleSections,
193195
/// Overloaded lists
194196
OverloadedLists,
195197
/// Numeric underscores
@@ -309,6 +311,7 @@ impl Extension {
309311
| Self::RecordWildCards
310312
| Self::NamedFieldPuns
311313
| Self::OverloadedStrings
314+
| Self::TupleSections
312315
| Self::OverloadedLists
313316
| Self::NumericUnderscores
314317
| Self::BinaryLiterals
@@ -356,8 +359,26 @@ impl Extension {
356359
| Self::QuasiQuotes
357360
| Self::NamedDefaults => ExtensionStatus::Unimplemented,
358361

359-
// Unknown
360-
Self::Unknown(_) => ExtensionStatus::Unknown,
362+
// Unknown — check if it's a recognized GHC extension we silently accept
363+
Self::Unknown(name) => {
364+
match name.as_str() {
365+
// Common GHC extensions we silently accept for compatibility
366+
"Arrows" | "MonoLocalBinds" | "ImpredicativeTypes"
367+
| "TypeFamilyDependencies" | "DeriveAnyClass" | "RoleAnnotations"
368+
| "AllowAmbiguousTypes" | "TypeSynonymInstances" | "PackageImports"
369+
| "NoImplicitPrelude" | "NoMonomorphismRestriction"
370+
| "DisambiguateRecordFields" | "DuplicateRecordFields"
371+
| "ApplicativeDo" | "NumDecimals" | "MagicHash"
372+
| "UnboxedTuples" | "TypeInType" | "PolyKinds"
373+
| "StarIsType" | "ImportQualifiedPost"
374+
| "StandaloneKindSignatures" | "QuantifiedConstraints"
375+
| "LinearTypes" | "UnicodeSyntax" | "ParallelListComp"
376+
| "TransformListComp" | "MonadComprehensions"
377+
| "ExtendedDefaultRules" | "PostfixOperators"
378+
| "ScopedTypeVariables" | "NoFieldSelectors" => ExtensionStatus::Supported,
379+
_ => ExtensionStatus::Unknown,
380+
}
381+
}
361382
}
362383
}
363384

@@ -392,6 +413,7 @@ impl Extension {
392413
"RecordWildCards" => Self::RecordWildCards,
393414
"NamedFieldPuns" => Self::NamedFieldPuns,
394415
"OverloadedStrings" => Self::OverloadedStrings,
416+
"TupleSections" => Self::TupleSections,
395417
"OverloadedLists" => Self::OverloadedLists,
396418
"NumericUnderscores" => Self::NumericUnderscores,
397419
"HexFloatLiterals" => Self::HexFloatLiterals,

crates/bhc-driver/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ bhc-hir = { workspace = true }
1919
bhc-lower = { workspace = true }
2020
bhc-hir-to-core = { workspace = true }
2121
bhc-typeck = { workspace = true }
22+
bhc-types = { workspace = true }
2223
bhc-intern = { workspace = true }
2324

2425
# Internal crates - IR pipeline

crates/bhc-driver/src/lib.rs

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,16 @@ pub enum CompileError {
112112
#[error("type checking failed: {0} errors")]
113113
TypeError(usize),
114114

115+
/// Type checking failed but module exports are still available.
116+
/// This allows downstream modules to continue checking.
117+
#[error("type checking failed: {count} errors")]
118+
TypeErrorWithInfo {
119+
/// Number of type errors.
120+
count: usize,
121+
/// Module info with exports (available because HIR parsed/lowered successfully).
122+
info: CompiledModuleInfo,
123+
},
124+
115125
/// HIR to Core lowering failed.
116126
#[error("core lowering failed: {0}")]
117127
CoreLowerError(#[from] bhc_hir_to_core::LowerError),
@@ -281,6 +291,8 @@ pub struct CompiledModuleInfo {
281291
pub symbols: Vec<CompiledSymbol>,
282292
/// Module exports for feeding into the lowering context of later modules.
283293
pub exports: ModuleExports,
294+
/// Type aliases defined in this module, for cross-module alias expansion.
295+
pub type_aliases: Vec<(bhc_intern::Symbol, Vec<bhc_types::TyVar>, bhc_types::Ty)>,
284296
}
285297

286298
/// Accumulates compilation artifacts across modules during multi-module compilation.
@@ -1392,6 +1404,7 @@ impl Compiler {
13921404
module_name: module_name.to_string(),
13931405
symbols: compiled_symbols,
13941406
exports,
1407+
type_aliases: Vec::new(),
13951408
};
13961409

13971410
Ok((object_path, compiled_info))
@@ -1545,16 +1558,59 @@ impl Compiler {
15451558
// Phase 2: Lower AST to HIR with registry context
15461559
let (hir, lower_ctx) = self.lower_with_registry(&ast, registry)?;
15471560

1548-
// Phase 3: Type check HIR
1549-
let _typed = self.type_check(&hir, file_id, &lower_ctx)?;
1561+
// Phase 3: Collect type aliases from imported modules
1562+
let mut imported_aliases = Vec::new();
1563+
for (_mod_name, info) in &registry.modules {
1564+
for (name, params, ty) in &info.type_aliases {
1565+
imported_aliases.push((*name, params.clone(), ty.clone()));
1566+
}
1567+
}
15501568

1551-
// Build exports for downstream modules (no Core IR needed)
1569+
// Phase 4: Type check HIR with imported type aliases
1570+
let type_errors = match bhc_typeck::type_check_module_full(&hir, file_id, Some(&lower_ctx.defs), &imported_aliases) {
1571+
Ok(_typed) => None,
1572+
Err(diagnostics) => {
1573+
eprintln!("Type errors:");
1574+
for (i, diag) in diagnostics.iter().enumerate() {
1575+
eprintln!(" {}: {}", i + 1, diag.message);
1576+
}
1577+
Some(diagnostics.len())
1578+
}
1579+
};
1580+
1581+
// Build exports for downstream modules (no Core IR needed).
1582+
// We build exports even if type checking failed, since exports come from
1583+
// the HIR (which parsed and lowered successfully). This allows downstream
1584+
// modules to still be checked rather than being skipped entirely.
15521585
let exports = Self::build_module_exports_from_hir(module_name, &hir, &lower_ctx);
15531586

1587+
// Extract type aliases for cross-module propagation
1588+
let mut type_aliases = Vec::new();
1589+
for item in &hir.items {
1590+
if let bhc_hir::Item::TypeAlias(alias) = item {
1591+
type_aliases.push((alias.name, alias.params.clone(), alias.ty.clone()));
1592+
}
1593+
}
1594+
1595+
// If there were type errors, return both the compiled info and the error.
1596+
// The caller can still register exports while reporting the failure.
1597+
if let Some(count) = type_errors {
1598+
return Err(CompileError::TypeErrorWithInfo {
1599+
count,
1600+
info: CompiledModuleInfo {
1601+
module_name: module_name.to_string(),
1602+
symbols: Vec::new(),
1603+
exports,
1604+
type_aliases,
1605+
},
1606+
});
1607+
}
1608+
15541609
Ok(CompiledModuleInfo {
15551610
module_name: module_name.to_string(),
15561611
symbols: Vec::new(),
15571612
exports,
1613+
type_aliases,
15581614
})
15591615
}
15601616

@@ -2877,6 +2933,7 @@ impl Compiler {
28772933
"Foreign.C",
28782934
"Foreign.C.Types",
28792935
"Foreign.C.String",
2936+
"Foreign.C.Error",
28802937
"Foreign.Storable",
28812938
"Foreign.Marshal",
28822939
"Foreign.Marshal.Alloc",
@@ -3220,7 +3277,9 @@ impl Compiler {
32203277
continue;
32213278
}
32223279

3223-
// Skip if any local dependency failed
3280+
// Skip if any local dependency failed to parse/lower (not type-check).
3281+
// Type-check failures don't block downstream: those modules still have
3282+
// their exports registered in the registry via TypeErrorWithInfo below.
32243283
let dep_failed = imports
32253284
.iter()
32263285
.any(|imp| failed_modules.contains(imp.as_str()));
@@ -3241,6 +3300,15 @@ impl Compiler {
32413300
registry.modules.insert(mod_name.clone(), compiled_info);
32423301
results.push((mod_name.clone(), Ok(())));
32433302
}
3303+
Err(CompileError::TypeErrorWithInfo { count, info }) => {
3304+
// Type checking failed, but parsing and lowering succeeded.
3305+
// Register the module's exports so downstream modules can
3306+
// still be checked (they won't be skipped due to dependency failure).
3307+
registry.modules.insert(mod_name.clone(), info);
3308+
// Still mark as failed for reporting purposes, but do NOT
3309+
// add to failed_modules so downstream modules aren't skipped.
3310+
results.push((mod_name.clone(), Err(CompileError::TypeError(count))));
3311+
}
32443312
Err(e) => {
32453313
failed_modules.insert(mod_name.clone());
32463314
results.push((mod_name.clone(), Err(e)));

crates/bhc-lexer/src/lib.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -988,10 +988,15 @@ impl<'src> Lexer<'src> {
988988
fn lex_char(&mut self, _start: usize) -> Token {
989989
self.advance(); // Opening '
990990

991-
// M9: Check for promoted list syntax '[
991+
// M9: Check for promoted list syntax '[ vs char literal '['
992992
if self.peek() == Some('[') {
993-
self.advance();
994-
return Token::new(TokenKind::TickLBracket);
993+
// Lookahead: '[' followed by closing quote means char literal '['
994+
if self.peek2() == Some('\'') {
995+
// It's the char literal '[' — fall through to normal char handling
996+
} else {
997+
self.advance();
998+
return Token::new(TokenKind::TickLBracket);
999+
}
9951000
}
9961001

9971002
let c = match self.peek() {

crates/bhc-lower/src/context.rs

Lines changed: 64 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ pub struct Scope {
9595
/// Parent scope, if any.
9696
pub parent: Option<ScopeId>,
9797
/// Value bindings (variables, functions).
98-
values: FxHashMap<Symbol, DefId>,
98+
pub values: FxHashMap<Symbol, DefId>,
9999
/// Type bindings (types, type constructors).
100100
types: FxHashMap<Symbol, DefId>,
101101
/// Constructor bindings.
@@ -1814,6 +1814,13 @@ impl LowerContext {
18141814
"Full",
18151815
// System.Locale
18161816
"LocaleCategory",
1817+
// Text.Parsec
1818+
"SourcePos",
1819+
"ParsecT",
1820+
"ParseError",
1821+
// Network.URI
1822+
"URI",
1823+
"URIAuth",
18171824
];
18181825

18191826
for name in stub_types {
@@ -2320,6 +2327,16 @@ impl LowerContext {
23202327
&self.scopes[self.current_scope.index()]
23212328
}
23222329

2330+
/// Gets the current scope ID (for debugging).
2331+
pub fn current_scope_id(&self) -> ScopeId {
2332+
self.current_scope
2333+
}
2334+
2335+
/// Gets all scopes (for debugging).
2336+
pub fn all_scopes(&self) -> &[Scope] {
2337+
&self.scopes
2338+
}
2339+
23232340
/// Gets the current scope mutably.
23242341
pub fn current_scope_mut(&mut self) -> &mut Scope {
23252342
let idx = self.current_scope.index();
@@ -2441,26 +2458,36 @@ impl LowerContext {
24412458
///
24422459
/// Returns the DefId if found, or None if not resolvable.
24432460
pub fn resolve_qualified_var(&self, qualifier: Symbol, name: Symbol) -> Option<DefId> {
2444-
// First, check if the qualifier is an alias
2445-
let module = self
2446-
.import_aliases
2447-
.get(&qualifier)
2448-
.copied()
2449-
.unwrap_or(qualifier);
2450-
2451-
// Try to look up as "Module.name"
2452-
let qualified_name = Symbol::intern(&format!("{}.{}", module.as_str(), name.as_str()));
2453-
2454-
// Check if we have a qualified name mapping
2455-
if let Some(unqualified) = self.qualified_names.get(&qualified_name) {
2461+
// First, try direct lookup of "Qualifier.name" as registered during import processing.
2462+
// This handles cases where multiple modules share the same qualifier alias
2463+
// (e.g., `import qualified Data.Text as T` and `import qualified Data.Text.Encoding as T`).
2464+
let aliased_name = Symbol::intern(&format!("{}.{}", qualifier.as_str(), name.as_str()));
2465+
if let Some(def_id) = self.lookup_value(aliased_name) {
2466+
return Some(def_id);
2467+
}
2468+
2469+
// Check if we have a qualified name mapping for the aliased name
2470+
if let Some(unqualified) = self.qualified_names.get(&aliased_name) {
24562471
if let Some(def_id) = self.lookup_value(*unqualified) {
24572472
return Some(def_id);
24582473
}
24592474
}
24602475

2461-
// Try direct lookup of the qualified name
2462-
if let Some(def_id) = self.lookup_value(qualified_name) {
2463-
return Some(def_id);
2476+
// Then, check if the qualifier is an alias and try the full module name
2477+
if let Some(&module) = self.import_aliases.get(&qualifier) {
2478+
let qualified_name = Symbol::intern(&format!("{}.{}", module.as_str(), name.as_str()));
2479+
2480+
// Check if we have a qualified name mapping
2481+
if let Some(unqualified) = self.qualified_names.get(&qualified_name) {
2482+
if let Some(def_id) = self.lookup_value(*unqualified) {
2483+
return Some(def_id);
2484+
}
2485+
}
2486+
2487+
// Try direct lookup of the qualified name
2488+
if let Some(def_id) = self.lookup_value(qualified_name) {
2489+
return Some(def_id);
2490+
}
24642491
}
24652492

24662493
// Try looking up the unqualified name directly (for builtins)
@@ -2471,26 +2498,32 @@ impl LowerContext {
24712498
///
24722499
/// Returns the DefId if found, or None if not resolvable.
24732500
pub fn resolve_qualified_constructor(&self, qualifier: Symbol, name: Symbol) -> Option<DefId> {
2474-
// First, check if the qualifier is an alias
2475-
let module = self
2476-
.import_aliases
2477-
.get(&qualifier)
2478-
.copied()
2479-
.unwrap_or(qualifier);
2480-
2481-
// Try to look up as "Module.Name"
2482-
let qualified_name = Symbol::intern(&format!("{}.{}", module.as_str(), name.as_str()));
2483-
2484-
// Check if we have a qualified name mapping
2485-
if let Some(unqualified) = self.qualified_names.get(&qualified_name) {
2501+
// First, try direct lookup of "Qualifier.Name" as registered during import processing.
2502+
let aliased_name = Symbol::intern(&format!("{}.{}", qualifier.as_str(), name.as_str()));
2503+
if let Some(def_id) = self.lookup_constructor(aliased_name) {
2504+
return Some(def_id);
2505+
}
2506+
2507+
// Check qualified name mapping for the aliased name
2508+
if let Some(unqualified) = self.qualified_names.get(&aliased_name) {
24862509
if let Some(def_id) = self.lookup_constructor(*unqualified) {
24872510
return Some(def_id);
24882511
}
24892512
}
24902513

2491-
// Try direct lookup of the qualified name
2492-
if let Some(def_id) = self.lookup_constructor(qualified_name) {
2493-
return Some(def_id);
2514+
// Then, check if the qualifier is an alias and try the full module name
2515+
if let Some(&module) = self.import_aliases.get(&qualifier) {
2516+
let qualified_name = Symbol::intern(&format!("{}.{}", module.as_str(), name.as_str()));
2517+
2518+
if let Some(unqualified) = self.qualified_names.get(&qualified_name) {
2519+
if let Some(def_id) = self.lookup_constructor(*unqualified) {
2520+
return Some(def_id);
2521+
}
2522+
}
2523+
2524+
if let Some(def_id) = self.lookup_constructor(qualified_name) {
2525+
return Some(def_id);
2526+
}
24942527
}
24952528

24962529
// Try looking up the unqualified name directly (for builtins)

0 commit comments

Comments
 (0)