-
Notifications
You must be signed in to change notification settings - Fork 844
/
Copy pathreadtime.py
55 lines (37 loc) · 1.51 KB
/
readtime.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import re
import math
from pelican import signals
from html.parser import HTMLParser #use html.parser for Python 3.6
# http://en.wikipedia.org/wiki/Words_per_minute
WPM = 230.0
class MLStripper(HTMLParser):
def __init__(self):
super().__init__() # subclassing HTMLParser, also need to calling
# super class's '__init__' method
self.reset()
self.fed = []
#this method is called whenever a 'data' is encountered.
def handle_data(self, d):
self.fed.append(d)
# join all content word into one long sentence for further processing
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html) # Feed the class with html content, get the fed list
return s.get_data()
def calculate_readtime(content_object):
if content_object._content is not None:
content = content_object._content # get the content html from Pelican
text = strip_tags(content) #strip tags and get long sentence
words = re.split(r'[^0-9A-Za-z]+', text) # split the long sentence into list of words
num_words = len(words) # count the words
minutes = int(math.ceil(num_words / WPM)) #calculate the minutes
#set minimum read time to 1 minutes.
if minutes == 0:
minutes = 1
content_object.readtime = {
"minutes": minutes,
}
def register():
signals.content_object_init.connect(calculate_readtime) # connect with 'content_object_init' signal.