Skip to content

Commit 9dfa6e6

Browse files
committed
test(pdf): add unit tests for PDF anchor extraction and resolution
1 parent 70f9621 commit 9dfa6e6

File tree

3 files changed

+401
-19
lines changed

3 files changed

+401
-19
lines changed

Sources/Navigator/PDF/PDFAnchorExtractor.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,8 @@ public struct PDFAnchorExtractor: Loggable {
153153
}
154154

155155
/// Extracts text context around the given range.
156-
private static func extractContext(
156+
/// - Note: Internal for testing.
157+
static func extractContext(
157158
around range: Range<Int>,
158159
in text: String,
159160
contextLength: Int
@@ -184,7 +185,8 @@ public struct PDFAnchorExtractor: Loggable {
184185
}
185186

186187
/// Checks if two bounds are approximately equal within a tolerance.
187-
private static func boundsApproximatelyEqual(
188+
/// - Note: Internal for testing.
189+
static func boundsApproximatelyEqual(
188190
_ a: CGRect,
189191
_ b: CGRect,
190192
tolerance: CGFloat

Sources/Navigator/PDF/PDFAnchorResolver.swift

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public struct PDFAnchorResolver: Loggable {
3939
}
4040

4141
// Strategy 1: Try quads first (pixel-perfect)
42-
if let bounds = resolveFromQuads(anchor) {
42+
if let bounds = resolveFromQuads(anchor.quads) {
4343
log(.debug, "Resolved PDF highlight from quads")
4444
return bounds
4545
}
@@ -60,9 +60,11 @@ public struct PDFAnchorResolver: Loggable {
6060
return []
6161
}
6262

63-
// MARK: - Private Resolution Methods
63+
// MARK: - Resolution Methods (internal for testing)
6464

65-
private static func parseAnchor(_ data: Any) -> ParsedAnchor? {
65+
/// Parses anchor data from dictionary or JSON string format.
66+
/// - Note: Internal for testing.
67+
static func parseAnchor(_ data: Any) -> ParsedAnchor? {
6668
// Handle both dictionary and JSON string formats
6769
let dict: [String: Any]
6870
if let d = data as? [String: Any] {
@@ -90,7 +92,9 @@ public struct PDFAnchorResolver: Loggable {
9092
)
9193
}
9294

93-
private static func parseQuads(_ data: Any?) -> [[CGPoint]]? {
95+
/// Parses quad coordinate data.
96+
/// - Note: Internal for testing.
97+
static func parseQuads(_ data: Any?) -> [[CGPoint]]? {
9498
guard let quadsArray = data as? [[[String: Double]]] else {
9599
return nil
96100
}
@@ -104,8 +108,10 @@ public struct PDFAnchorResolver: Loggable {
104108
}
105109
}
106110

107-
private static func resolveFromQuads(_ anchor: ParsedAnchor) -> [CGRect]? {
108-
guard let quads = anchor.quads, !quads.isEmpty else {
111+
/// Resolves bounds from quad coordinates.
112+
/// - Note: Internal for testing.
113+
static func resolveFromQuads(_ quads: [[CGPoint]]?) -> [CGRect]? {
114+
guard let quads = quads, !quads.isEmpty else {
109115
return nil
110116
}
111117

@@ -182,8 +188,8 @@ public struct PDFAnchorResolver: Loggable {
182188

183189
// Multiple occurrences: score by context match
184190
let bestRange = ranges.max { range1, range2 in
185-
contextScore(for: range1, anchor: anchor, in: pageText) <
186-
contextScore(for: range2, anchor: anchor, in: pageText)
191+
contextScore(for: range1, textBefore: anchor.textBefore, textAfter: anchor.textAfter, in: pageText) <
192+
contextScore(for: range2, textBefore: anchor.textBefore, textAfter: anchor.textAfter, in: pageText)
187193
}
188194

189195
guard let best = bestRange else { return nil }
@@ -196,14 +202,17 @@ public struct PDFAnchorResolver: Loggable {
196202
return boundsFromSelection(selection, page: page)
197203
}
198204

199-
private static func contextScore(
205+
/// Calculates a score for how well a range matches the expected context.
206+
/// - Note: Internal for testing.
207+
static func contextScore(
200208
for range: Range<String.Index>,
201-
anchor: ParsedAnchor,
209+
textBefore: String?,
210+
textAfter: String?,
202211
in text: String
203212
) -> Int {
204213
var score = 0
205214

206-
if let before = anchor.textBefore {
215+
if let before = textBefore {
207216
let contextLength = before.count
208217
let contextStart = text.index(
209218
range.lowerBound,
@@ -221,7 +230,7 @@ public struct PDFAnchorResolver: Loggable {
221230
}
222231
}
223232

224-
if let after = anchor.textAfter {
233+
if let after = textAfter {
225234
let contextLength = after.count
226235
let contextEnd = text.index(
227236
range.upperBound,
@@ -318,14 +327,16 @@ public struct PDFAnchorResolver: Loggable {
318327
return []
319328
}
320329

321-
/// Normalizes whitespace by collapsing multiple spaces/newlines into single spaces
322-
private static func normalizeWhitespace(_ text: String) -> String {
330+
/// Normalizes whitespace by collapsing multiple spaces/newlines into single spaces.
331+
/// - Note: Internal for testing.
332+
static func normalizeWhitespace(_ text: String) -> String {
323333
let components = text.components(separatedBy: .whitespacesAndNewlines)
324334
return components.filter { !$0.isEmpty }.joined(separator: " ")
325335
}
326336

327-
/// Extracts the first N words from text
328-
private static func extractFirstWords(from text: String, count: Int) -> String {
337+
/// Extracts the first N words from text.
338+
/// - Note: Internal for testing.
339+
static func extractFirstWords(from text: String, count: Int) -> String {
329340
let words = text.split(separator: " ", omittingEmptySubsequences: true)
330341
let firstWords = words.prefix(count)
331342
return firstWords.joined(separator: " ")
@@ -382,7 +393,9 @@ public struct PDFAnchorResolver: Loggable {
382393

383394
// MARK: - Internal Types
384395

385-
private struct ParsedAnchor {
396+
/// Parsed anchor data structure.
397+
/// - Note: Internal for testing.
398+
struct ParsedAnchor {
386399
let pageIndex: Int?
387400
let quads: [[CGPoint]]?
388401
let characterStart: Int?

0 commit comments

Comments
 (0)