-
Notifications
You must be signed in to change notification settings - Fork 31
Expand file tree
/
Copy pathgen_notes.py
More file actions
266 lines (227 loc) · 9.78 KB
/
gen_notes.py
File metadata and controls
266 lines (227 loc) · 9.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
from itertools import zip_longest
import re
from typing import Any, Callable, Dict, Iterable, List, Optional, TYPE_CHECKING
if TYPE_CHECKING:
from anki.notes import Note
class PoemLine:
def __init__(self) -> None:
self.predecessor = self # so it's the right type...
self.successor: Optional['PoemLine'] = None
self.seq = -1
def populate_note(self, note: 'Note', title: str, author: str, tags: List[str],
context_lines: int, recite_lines: int, deck_id: int) -> None:
"""
Fill the _note_ with content testing on the current line.
"""
note.model()['did'] = deck_id # type: ignore
note.tags = tags
note['Title'] = title
note['Author'] = author
note['Sequence'] = str(self.seq)
note['Context'] = self._format_context(context_lines)
note['Line'] = self._format_text(recite_lines)
prompt = self._get_prompt(recite_lines)
if prompt is not None:
note['Prompt'] = prompt
def _format_context(self, context_lines: int):
return ''.join("<p>%s</p>" % i for i in self._get_context(context_lines))
def _format_text(self, recitation_lines: int):
return ''.join("<p>%s</p>" % i for i in self._get_text(recitation_lines))
def _get_context(self, _lines: int, _recursing=False) -> List[str]:
"""
Return a list of context lines, including the current line and
(lines - 1) of its predecessors.
"""
raise NotImplementedError
def _get_text(self, _lines: int) -> List[str]:
"""
Return a list of recitation lines, including the current line and
(lines - 1) of its successors.
"""
raise NotImplementedError
def _get_prompt(self, configured_recitation_lines: int) -> Optional[str]:
"""
Return a prompt string to be shown on the question side after the
lines of context, or None to use the template default of [...]. This
is currently used to let the user know how many lines to recite, but
could plausibly be used for other things as well in the future.
"""
raise NotImplementedError
class Beginning(PoemLine):
"""
A dummy node indicating the beginning of the poem. It's included only so
it can polymorphically have its context and sequence retrieved.
Attempting to do anything else with the node is an error.
"""
def __init__(self):
super().__init__()
self.seq = 0
self.text = "[Beginning]"
def _get_context(self, _lines: int, _recursing=False) -> List[str]:
return [self.text]
def _get_text(self, _lines: int) -> List[str]:
"""
The Beginning node has no defined successors, as it's not a line
we'll ever be asked to recite and thus we never need to know what its
text property is -- the first line we would ever be asked to recite
would be the following line.
"""
raise NotImplementedError
def populate_note(self, note: 'Note', title: str, author: str, tags: List[str],
context_lines: int, recite_lines: int, deck_id: int) -> None:
raise AssertionError("The Beginning node cannot be used to populate a note.")
class SingleLine(PoemLine):
"""
A single line in a typical poem. It has text, a sequence number, a
predecessor (possibly the Beginning node, but never None), and if it's
not the last line of the poem, a successor.
"""
def __init__(self, text: str, predecessor: 'PoemLine') -> None:
super().__init__()
self.text = text
self.predecessor = predecessor
self.seq = self.predecessor.seq + 1
def _get_context(self, lines: int, recursing=False) -> List[str]:
if lines == 0:
return [self.text]
elif not recursing:
return self.predecessor._get_context(lines - 1, True)
else:
return self.predecessor._get_context(lines - 1, True) + [self.text]
def _get_text(self, lines: int) -> List[str]:
if lines == 1 or self.successor is None:
return [self.text]
else:
return [self.text] + self.successor._get_text(lines - 1)
def _get_prompt(self, configured_recitation_lines: int) -> Optional[str]:
# It's important to calculate the lines_to_recite for _this_ instance
# instead of just getting the configuration parameter, as if we're at
# the end it may be fewer.
lines_to_recite = len(self._get_text(configured_recitation_lines))
if lines_to_recite == 1:
return None
else:
return f"[...{lines_to_recite}]"
class GroupedLine(PoemLine):
r"""
A virtual "line" in a poem that has grouping set, so that multiple short
lines can be treated as one line by LPCG. It consists of multiple text lines.
The difference between grouped lines and ordinary lines with double the
context and recitation values is that there is no overlapping. So this with
default context and recitation values and a group of 2 yields only 3 notes,
whereas a context of 4 and recitation of 2 would result in 6 notes:
/A
\B
/C
\D
/E
\F
"""
def __init__(self, text: List[str], predecessor: 'PoemLine') -> None:
super().__init__()
self.text_lines = text
self.predecessor = predecessor
self.seq = self.predecessor.seq + 1
def _get_context(self, lines: int, recursing=False) -> List[str]:
if lines == 0:
return self.text_lines
elif not recursing:
return self.predecessor._get_context(lines - 1, True)
else:
return self.predecessor._get_context(lines - 1, True) + self.text_lines
def _get_text(self, lines: int) -> List[str]:
if lines == 1 or self.successor is None:
return self.text_lines
else:
return self.text_lines + self.successor._get_text(lines - 1)
def _get_prompt(self, configured_recitation_lines: int) -> Optional[str]:
lines_to_recite = len(self._get_text(configured_recitation_lines))
if lines_to_recite == 1:
return None
else:
return f"[...{lines_to_recite}]"
def groups_of_n(iterable: Iterable, n: int) -> Iterable:
"""
s -> (s0,s1,s2,...sn-1), (sn,sn+1,sn+2,...s2n-1), (s2n,s2n+1,s2n+2,...s3n-1), ...
Credit: https://stackoverflow.com/questions/5389507/iterating-over-every-two-elements-in-a-list
"""
return zip_longest(*[iter(iterable)]*n)
def _poemlines_from_textlines(text_lines: List[str], group_lines: int) -> List[PoemLine]:
"""
Given a list of cleansed text lines, create a list of PoemLine objects
from it. These are each capable of constructing a correct note testing
themselves when the to_note() method is called on them.
"""
beginning = Beginning()
lines: List[PoemLine] = [] # does not include beginning, as it's not actually a line
pred: PoemLine = beginning
poem_line: PoemLine
if group_lines == 1:
for text_line in text_lines:
poem_line = SingleLine(text_line, pred)
lines.append(poem_line)
pred.successor = poem_line
pred = poem_line
else:
for line_set in groups_of_n(text_lines, group_lines):
poem_line = GroupedLine([i for i in line_set if i is not None], pred)
lines.append(poem_line)
pred.successor = poem_line
pred = poem_line
return lines
def cleanse_text(string: str, config: Dict[str, Any]) -> List[str]:
"""
Munge raw text from the poem editor into a list of lines that can be
directly made into notes.
"""
def _normalize_blank_lines(text_lines):
# remove consecutive lone newlines
new_text = []
last_line = ""
for i in text_lines:
if last_line.strip() or i.strip():
new_text.append(i)
last_line = i
# remove lone newlines at beginning and end
for i in (0, -1):
if not new_text[i].strip():
del new_text[i]
return new_text
text = string.splitlines()
# record a level of indentation if appropriate
text = [re.sub(r'^[ \t]+', r'<indent>', i) for i in text]
# remove comments and normalize blank lines
text = [i.strip() for i in text if not i.startswith("#")]
text = [re.sub(r'\s*\#.*$', '', i) for i in text]
text = _normalize_blank_lines(text)
# add end-of-stanza/poem markers where appropriate
for i in range(len(text)):
if i == len(text) - 1:
text[i] += config['endOfTextMarker']
elif not text[i+1].strip():
text[i] += config['endOfStanzaMarker']
# entirely remove all blank lines
text = [i for i in text if i.strip()]
# replace <indent>s with valid CSS
text = [re.sub(r'^<indent>(.*)$', r'<span class="indent">\1</span>', i)
for i in text]
return text
def add_notes(col: Any, note_constructor: Callable,
title: str, author:str, tags: List[str], text: List[str],
deck_id: int, context_lines: int, group_lines: int,
recite_lines: int):
"""
Generate notes from the given title, author, tags, poem text, and number of
lines of context. Return the number of notes added.
Return the number of notes added.
Raises KeyError if the note type is missing fields, which I've seen
happen a couple times when users accidentally edited the note type. The
caller should offer an appropriate error message in this case.
"""
added = 0
for line in _poemlines_from_textlines(text, group_lines):
n = note_constructor(col, col.models.byName("LPCG 1.0"))
line.populate_note(n, title, author, tags, context_lines, recite_lines, deck_id)
col.addNote(n)
added += 1
return added