|
| 1 | +// |
| 2 | +// Copyright 2026 Readium Foundation. All rights reserved. |
| 3 | +// Use of this source code is governed by the BSD-style license |
| 4 | +// available in the top-level LICENSE file of the project. |
| 5 | +// |
| 6 | + |
| 7 | +import Foundation |
| 8 | +import ReadiumFuzi |
| 9 | +import ReadiumShared |
| 10 | + |
| 11 | +/// Parses ComicInfo.xml metadata from CBZ archives. |
| 12 | +/// |
| 13 | +/// ComicInfo.xml is a metadata format originating from the ComicRack |
| 14 | +/// application. |
| 15 | +/// See: https://anansi-project.github.io/docs/comicinfo/documentation |
| 16 | +struct ComicInfoParser { |
| 17 | + /// Parses ComicInfo.xml data and returns the parsed metadata. |
| 18 | + static func parse(data: Data, warnings: WarningLogger?) -> ComicInfo? { |
| 19 | + guard let document = try? XMLDocument(data: data) else { |
| 20 | + warnings?.log(ComicInfoWarning(message: "Failed to parse ComicInfo.xml")) |
| 21 | + return nil |
| 22 | + } |
| 23 | + |
| 24 | + guard let root = document.root, root.tag == "ComicInfo" else { |
| 25 | + warnings?.log(ComicInfoWarning(message: "ComicInfo.xml root element is not <ComicInfo>")) |
| 26 | + return nil |
| 27 | + } |
| 28 | + |
| 29 | + return ComicInfo(element: root) |
| 30 | + } |
| 31 | +} |
| 32 | + |
| 33 | +/// Warning raised when parsing a ComicInfo.xml file. |
| 34 | +struct ComicInfoWarning: Warning { |
| 35 | + let message: String |
| 36 | + var severity: WarningSeverityLevel { .minor } |
| 37 | + var tag: String { "comicinfo" } |
| 38 | +} |
| 39 | + |
| 40 | +/// Parsed representation of ComicInfo.xml data. |
| 41 | +/// |
| 42 | +/// Only metadata fields that map to RWPM are exposed as first-class properties. |
| 43 | +/// All other fields are available in the `otherMetadata` dictionary. |
| 44 | +/// |
| 45 | +/// See https://anansi-project.github.io/docs/comicinfo/documentation |
| 46 | +struct ComicInfo { |
| 47 | + /// Title of the book. |
| 48 | + var title: String? |
| 49 | + |
| 50 | + /// Title of the series the book is part of. |
| 51 | + var series: String? |
| 52 | + |
| 53 | + /// Number of the book in the series. |
| 54 | + var number: String? |
| 55 | + |
| 56 | + /// Alternate series name, used for cross-over story arcs. |
| 57 | + var alternateSeries: String? |
| 58 | + |
| 59 | + /// Number of the book in the alternate series. |
| 60 | + var alternateNumber: String? |
| 61 | + |
| 62 | + /// A description or summary of the book. |
| 63 | + var summary: String? |
| 64 | + |
| 65 | + /// Person or organization responsible for publishing, releasing, or |
| 66 | + /// issuing a resource. |
| 67 | + var publisher: String? |
| 68 | + |
| 69 | + /// An imprint is a group of publications under the umbrella of a larger |
| 70 | + /// imprint or publisher. |
| 71 | + var imprint: String? |
| 72 | + |
| 73 | + /// Release year of the book. |
| 74 | + var year: Int? |
| 75 | + |
| 76 | + /// Release month of the book. |
| 77 | + var month: Int? |
| 78 | + |
| 79 | + /// Release day of the book. |
| 80 | + var day: Int? |
| 81 | + |
| 82 | + /// Language of the book using IETF BCP 47 language tags. |
| 83 | + var languageISO: String? |
| 84 | + |
| 85 | + /// Global Trade Item Number identifying the book (ISBN, EAN, etc.). |
| 86 | + var gtin: String? |
| 87 | + |
| 88 | + /// People or organizations responsible for creating the scenario. |
| 89 | + var writers: [String] = [] |
| 90 | + |
| 91 | + /// People or organizations responsible for drawing the art. |
| 92 | + var pencillers: [String] = [] |
| 93 | + |
| 94 | + /// People or organizations responsible for inking the pencil art. |
| 95 | + var inkers: [String] = [] |
| 96 | + |
| 97 | + /// People or organizations responsible for applying color to drawings. |
| 98 | + var colorists: [String] = [] |
| 99 | + |
| 100 | + /// People or organizations responsible for drawing text and speech bubbles. |
| 101 | + var letterers: [String] = [] |
| 102 | + |
| 103 | + /// People or organizations responsible for drawing the cover art. |
| 104 | + var coverArtists: [String] = [] |
| 105 | + |
| 106 | + /// People or organizations responsible for preparing the resource for |
| 107 | + /// production. |
| 108 | + var editors: [String] = [] |
| 109 | + |
| 110 | + /// People or organizations responsible for rendering text from one language |
| 111 | + /// into another. |
| 112 | + var translators: [String] = [] |
| 113 | + |
| 114 | + /// Genres of the book or series (e.g., Science-Fiction, Shonen). |
| 115 | + var genres: [String] = [] |
| 116 | + |
| 117 | + /// Whether the book is a manga. The value `.yesAndRightToLeft` indicates |
| 118 | + /// right-to-left reading direction. |
| 119 | + var manga: Manga? |
| 120 | + |
| 121 | + /// Page information parsed from the <Pages> element. |
| 122 | + var pages: [PageInfo] = [] |
| 123 | + |
| 124 | + /// Returns the first page with the given type, if any. |
| 125 | + func firstPageWithType(_ type: PageType) -> PageInfo? { |
| 126 | + pages.first { $0.type == type } |
| 127 | + } |
| 128 | + |
| 129 | + /// All other metadata fields not directly mapped to RWPM. |
| 130 | + /// |
| 131 | + /// Keys are the XML tag names (e.g., "Volume", "Characters", "AgeRating"). |
| 132 | + /// Values are strings as they appear in the XML. |
| 133 | + var otherMetadata: [String: String] = [:] |
| 134 | + |
| 135 | + /// URL prefix for otherMetadata keys when converting to RWPM. |
| 136 | + private static let otherMetadataPrefix = "https://anansi-project.github.io/docs/comicinfo/documentation#" |
| 137 | + |
| 138 | + init(element: ReadiumFuzi.XMLElement) { |
| 139 | + for child in element.children { |
| 140 | + guard let tag = child.tag else { continue } |
| 141 | + |
| 142 | + // Pages element has no text content, only child elements |
| 143 | + if tag == "Pages" { |
| 144 | + pages = child.children(tag: "Page").compactMap { PageInfo(element: $0) } |
| 145 | + continue |
| 146 | + } |
| 147 | + |
| 148 | + let value = child.stringValue.trimmingCharacters(in: .whitespacesAndNewlines) |
| 149 | + guard !value.isEmpty else { continue } |
| 150 | + |
| 151 | + switch tag { |
| 152 | + // Core |
| 153 | + case "AlternateNumber": alternateNumber = value |
| 154 | + case "AlternateSeries": alternateSeries = value |
| 155 | + case "Day": day = Int(value) |
| 156 | + case "GTIN": gtin = value |
| 157 | + case "Genre": genres = value.splitComma() |
| 158 | + case "Imprint": imprint = value |
| 159 | + case "LanguageISO": languageISO = value |
| 160 | + case "Manga": manga = Manga(rawValue: value) |
| 161 | + case "Month": month = Int(value) |
| 162 | + case "Number": number = value |
| 163 | + case "Publisher": publisher = value |
| 164 | + case "Series": series = value |
| 165 | + case "Summary": summary = value |
| 166 | + case "Title": title = value |
| 167 | + case "Year": year = Int(value) |
| 168 | + |
| 169 | + // Contributors |
| 170 | + case "Colorist": colorists = value.splitComma() |
| 171 | + case "CoverArtist": coverArtists = value.splitComma() |
| 172 | + case "Editor": editors = value.splitComma() |
| 173 | + case "Inker": inkers = value.splitComma() |
| 174 | + case "Letterer": letterers = value.splitComma() |
| 175 | + case "Penciller": pencillers = value.splitComma() |
| 176 | + case "Translator": translators = value.splitComma() |
| 177 | + case "Writer": writers = value.splitComma() |
| 178 | + |
| 179 | + // Everything else goes to otherMetadata |
| 180 | + default: otherMetadata[tag] = value |
| 181 | + } |
| 182 | + } |
| 183 | + } |
| 184 | + |
| 185 | + /// Converts to RWPM Metadata. |
| 186 | + func toMetadata() -> Metadata { |
| 187 | + // Build published date from year/month/day |
| 188 | + var published: Date? |
| 189 | + if let year = year { |
| 190 | + var components = DateComponents() |
| 191 | + components.year = year |
| 192 | + components.month = month ?? 1 |
| 193 | + components.day = day ?? 1 |
| 194 | + published = Calendar(identifier: .gregorian).date(from: components) |
| 195 | + } |
| 196 | + |
| 197 | + // Parse series |
| 198 | + var belongsToSeries: [Contributor] = [] |
| 199 | + if let series = series { |
| 200 | + let position = number.flatMap { Double($0) } |
| 201 | + belongsToSeries.append(Contributor(name: series, position: position)) |
| 202 | + } |
| 203 | + if let alternateSeries = alternateSeries { |
| 204 | + let position = alternateNumber.flatMap { Double($0) } |
| 205 | + belongsToSeries.append(Contributor(name: alternateSeries, position: position)) |
| 206 | + } |
| 207 | + |
| 208 | + // Build other metadata with specification URL prefix |
| 209 | + var rwpmOtherMetadata: [String: Any] = [:] |
| 210 | + for (key, value) in otherMetadata { |
| 211 | + rwpmOtherMetadata[Self.otherMetadataPrefix + key.lowercased()] = value |
| 212 | + } |
| 213 | + |
| 214 | + return Metadata( |
| 215 | + identifier: gtin, |
| 216 | + conformsTo: [.divina], |
| 217 | + title: title, |
| 218 | + published: published, |
| 219 | + languages: languageISO.map { [$0] } ?? [], |
| 220 | + subjects: genres.map { Subject(name: $0) }, |
| 221 | + authors: writers.map { Contributor(name: $0) }, |
| 222 | + translators: translators.map { Contributor(name: $0) }, |
| 223 | + editors: editors.map { Contributor(name: $0) }, |
| 224 | + letterers: letterers.map { Contributor(name: $0) }, |
| 225 | + pencilers: pencillers.map { Contributor(name: $0) }, |
| 226 | + colorists: colorists.map { Contributor(name: $0) }, |
| 227 | + inkers: inkers.map { Contributor(name: $0) }, |
| 228 | + contributors: coverArtists.map { Contributor(name: $0, role: "cov") }, |
| 229 | + publishers: publisher.map { [Contributor(name: $0)] } ?? [], |
| 230 | + imprints: imprint.map { [Contributor(name: $0)] } ?? [], |
| 231 | + readingProgression: (manga == .yesAndRightToLeft) ? .rtl : .auto, |
| 232 | + description: summary, |
| 233 | + belongsToSeries: belongsToSeries, |
| 234 | + otherMetadata: rwpmOtherMetadata |
| 235 | + ) |
| 236 | + } |
| 237 | + |
| 238 | + // MARK: - ComicInfo Types |
| 239 | + |
| 240 | + /// Page type values from the ComicInfo specification. |
| 241 | + /// |
| 242 | + /// See: https://anansi-project.github.io/docs/comicinfo/documentation#type |
| 243 | + enum PageType: Hashable, Sendable { |
| 244 | + case frontCover |
| 245 | + case innerCover |
| 246 | + case roundup |
| 247 | + case story |
| 248 | + case advertisement |
| 249 | + case editorial |
| 250 | + case letters |
| 251 | + case preview |
| 252 | + case backCover |
| 253 | + case other |
| 254 | + case deleted |
| 255 | + |
| 256 | + /// Case-insensitive initializer. |
| 257 | + init?(rawValue: String) { |
| 258 | + switch rawValue.lowercased() { |
| 259 | + case "frontcover": self = .frontCover |
| 260 | + case "innercover": self = .innerCover |
| 261 | + case "roundup": self = .roundup |
| 262 | + case "story": self = .story |
| 263 | + case "advertisement": self = .advertisement |
| 264 | + case "editorial": self = .editorial |
| 265 | + case "letters": self = .letters |
| 266 | + case "preview": self = .preview |
| 267 | + case "backcover": self = .backCover |
| 268 | + case "other": self = .other |
| 269 | + case "deleted", "delete": self = .deleted |
| 270 | + default: return nil |
| 271 | + } |
| 272 | + } |
| 273 | + } |
| 274 | + |
| 275 | + /// Information about a single page from ComicInfo.xml. |
| 276 | + /// |
| 277 | + /// See: https://anansi-project.github.io/docs/comicinfo/documentation#pages--comicpageinfo |
| 278 | + struct PageInfo: Hashable, Sendable { |
| 279 | + /// Zero-based index of this page in the reading order. |
| 280 | + let image: Int |
| 281 | + |
| 282 | + /// The type/purpose of this page. |
| 283 | + let type: PageType? |
| 284 | + |
| 285 | + /// Whether this is a double-page spread. |
| 286 | + let doublePage: Bool? |
| 287 | + |
| 288 | + /// File size in bytes. |
| 289 | + let imageSize: Int64? |
| 290 | + |
| 291 | + /// Page key/identifier. |
| 292 | + let key: String? |
| 293 | + |
| 294 | + /// Bookmark name for this page. |
| 295 | + let bookmark: String? |
| 296 | + |
| 297 | + /// Width of the page image in pixels. |
| 298 | + let imageWidth: Int? |
| 299 | + |
| 300 | + /// Height of the page image in pixels. |
| 301 | + let imageHeight: Int? |
| 302 | + |
| 303 | + /// Parses a PageInfo from an XML <Page> element. |
| 304 | + init?(element: ReadiumFuzi.XMLElement) { |
| 305 | + guard |
| 306 | + let imageStr = element.attr("Image"), |
| 307 | + let image = Int(imageStr) |
| 308 | + else { |
| 309 | + return nil |
| 310 | + } |
| 311 | + |
| 312 | + self.image = image |
| 313 | + type = element.attr("Type").flatMap { PageType(rawValue: $0) } |
| 314 | + doublePage = element.attr("DoublePage").flatMap { |
| 315 | + switch $0.lowercased() { |
| 316 | + case "true", "1": return true |
| 317 | + case "false", "0": return false |
| 318 | + default: return nil |
| 319 | + } |
| 320 | + } |
| 321 | + imageSize = element.attr("ImageSize").flatMap { Int64($0) } |
| 322 | + key = element.attr("Key") |
| 323 | + bookmark = element.attr("Bookmark") |
| 324 | + imageWidth = element.attr("ImageWidth").flatMap { Int($0) } |
| 325 | + imageHeight = element.attr("ImageHeight").flatMap { Int($0) } |
| 326 | + } |
| 327 | + } |
| 328 | + |
| 329 | + /// Manga field values indicating whether the book is a manga and its |
| 330 | + /// reading direction. |
| 331 | + /// |
| 332 | + /// See: https://anansi-project.github.io/docs/comicinfo/documentation#manga |
| 333 | + enum Manga { |
| 334 | + case unknown |
| 335 | + case no |
| 336 | + case yes |
| 337 | + case yesAndRightToLeft |
| 338 | + |
| 339 | + /// Case-insensitive initializer. |
| 340 | + init?(rawValue: String) { |
| 341 | + switch rawValue.lowercased() { |
| 342 | + case "unknown": self = .unknown |
| 343 | + case "no": self = .no |
| 344 | + case "yes": self = .yes |
| 345 | + case "yesandrighttoleft": self = .yesAndRightToLeft |
| 346 | + default: return nil |
| 347 | + } |
| 348 | + } |
| 349 | + } |
| 350 | +} |
| 351 | + |
| 352 | +private extension String { |
| 353 | + func splitComma() -> [String] { |
| 354 | + split(separator: ",") |
| 355 | + .map { $0.trimmingCharacters(in: .whitespaces) } |
| 356 | + .filter { !$0.isEmpty } |
| 357 | + } |
| 358 | +} |
0 commit comments