|
51 | 51 | "Spatial Partition"
|
52 | 52 | ]
|
53 | 53 |
|
| 54 | +# URLs for hyperlinks to chapters. Note that the order is significant here. |
| 55 | +# The index in this list + 1 is the chapter's number in the table of contents. |
| 56 | +CHAPTER_HREFS = [ |
| 57 | + "introduction.html", |
| 58 | + "architecture-performance-and-games.html", |
| 59 | + "command.html", |
| 60 | + "flyweight.html", |
| 61 | + "observer.html", |
| 62 | + "prototype.html", |
| 63 | + "singleton.html", |
| 64 | + "state.html", |
| 65 | + "double-buffer.html", |
| 66 | + "game-loop.html", |
| 67 | + "update-method.html", |
| 68 | + "bytecode.html", |
| 69 | + "subclass-sandbox.html", |
| 70 | + "type-object.html", |
| 71 | + "component.html", |
| 72 | + "event-queue.html", |
| 73 | + "service-locator.html", |
| 74 | + "data-locality.html", |
| 75 | + "dirty-flag.html", |
| 76 | + "object-pool.html", |
| 77 | + "spatial-partition.html" |
| 78 | +] |
| 79 | + |
54 | 80 | num_chapters = 0
|
55 | 81 | empty_chapters = 0
|
56 | 82 | total_words = 0
|
@@ -121,7 +147,7 @@ def format_file(path, nav, skip_up_to_date):
|
121 | 147 | else:
|
122 | 148 | print "UNKNOWN COMMAND:", command, args
|
123 | 149 |
|
124 |
| - elif stripped.startswith('#'): |
| 150 | + elif extension != "xml" and stripped.startswith('#'): |
125 | 151 | # Build the page navigation from the headers.
|
126 | 152 | index = stripped.find(" ")
|
127 | 153 | headertype = stripped[:index]
|
@@ -230,10 +256,24 @@ def clean_up_code_xml(code):
|
230 | 256 |
|
231 | 257 | return code
|
232 | 258 |
|
| 259 | + def fix_link(match): |
| 260 | + tag = match.group(1) |
| 261 | + contents = match.group(2) |
| 262 | + href = re.search(r'href\s*=\s*"([^"]+)"', tag).group(1) |
| 263 | + |
| 264 | + # If it's not a link to a chapter, just return the contents of the link and |
| 265 | + # strip out the link itself. |
| 266 | + if not href in CHAPTER_HREFS: |
| 267 | + return contents |
| 268 | + |
| 269 | + # Turn it into a chapter number reference. |
| 270 | + return "{}<chap-ref> ({})</chap-ref>".format( |
| 271 | + contents, CHAPTER_HREFS.index(href) + 1) |
| 272 | + |
233 | 273 | def clean_up_xhtml(html):
|
234 |
| - # Remove links. |
235 |
| - html = re.sub(r"<a\s[^>]+>", "", html) |
236 |
| - html = re.sub(r"</a>", "", html) |
| 274 | + # Replace chapter links with chapter number references and remove other |
| 275 | + # links. |
| 276 | + html = re.sub(r"<a\s+([^>]+)>([^<]+)</a>", fix_link, html) |
237 | 277 |
|
238 | 278 | # Ditch newlines in the middle of blocks of text. Out of sheer malice,
|
239 | 279 | # even though they are meaningless in actual XML, InDesign treats them
|
|
0 commit comments