Skip to content

Commit 113c138

Browse files
elfkuzcoaudiodude
authored andcommitted
refactor: store logs as Redis hashes
1 parent 3bc5e88 commit 113c138

13 files changed

Lines changed: 187 additions & 138 deletions

File tree

wp1/base_db_test.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
from wp1.environment import Environment
99
from wp1.models.wp10.selection import Selection
1010

11+
from wp1.redis_db import connect as redis_connect
12+
13+
from wp1.models.wp10.rating import Rating
14+
1115
logger = logging.getLogger(__name__)
1216

1317
try:
@@ -93,10 +97,28 @@ def _setup_wp_one_db(self):
9397
cursor.execute(stmt)
9498
self.wp10db.commit()
9599

100+
def connect_redis_db(self):
101+
if ENV != Environment.TEST:
102+
raise ValueError(
103+
'Database tests destroy data! They should only be run in the TEST env'
104+
)
105+
return redis_connect()
106+
107+
def _setup_redis_db(self):
108+
self.redis = self.connect_redis_db()
109+
self.redis.ping()
110+
self.redis.flushdb()
111+
112+
def _cleanup_redis_db(self):
113+
self.redis.flushdb()
114+
96115
def setUp(self):
97116
self.addCleanup(self._cleanup_wp_one_db)
98117
self._setup_wp_one_db()
99118

119+
self.addCleanup(self._cleanup_redis_db)
120+
self._setup_redis_db()
121+
100122

101123
class BaseWikiDbTest(WpOneAssertions):
102124

@@ -144,6 +166,9 @@ def setUp(self):
144166
self.addCleanup(self._cleanup_wp_one_db)
145167
self._setup_wp_one_db()
146168

169+
self.addCleanup(self._cleanup_redis_db)
170+
self._setup_redis_db()
171+
147172

148173
def get_first_selection(wp10db):
149174
with wp10db.cursor() as cursor:

wp1/logic/log.py

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,59 @@
1-
import attr
1+
import datetime
22

3+
import attr
4+
from redis import Redis
5+
from wp1.redis_db import gen_redis_log_key
36
from wp1.models.wp10.log import Log
47

8+
# Redis does not allow None types. However if a log to be stored has a None
9+
# we convert it to this value while storing on Redis and back to None
10+
# when converting from Redis to python object
11+
REDIS_NULL = b"__redis__none__"
12+
13+
14+
def insert_or_update(redis: Redis, log: Log):
15+
log_key = gen_redis_log_key(project=log.l_project,
16+
namespace=log.l_namespace,
17+
action=log.l_action,
18+
article=log.l_article)
19+
with redis.pipeline() as pipe:
20+
mapping = {
21+
k: REDIS_NULL if v is None else v for k, v in attr.asdict(log).items()
22+
}
23+
pipe.hset(log_key, mapping=mapping)
24+
pipe.expire(log_key, datetime.timedelta(days=7))
25+
pipe.execute()
26+
27+
28+
def get_logs(
29+
redis: Redis,
30+
*,
31+
project: str | bytes = "*",
32+
namespace: str | bytes = "*",
33+
action: str | bytes = "*",
34+
article: str | bytes = "*",
35+
start_dt: datetime.datetime | None = None,
36+
) -> list[Log]:
37+
"""Retrieve logs from Redis matching the given filters."""
38+
key = gen_redis_log_key(project=project,
39+
namespace=namespace,
40+
action=action,
41+
article=article)
42+
logs: list[Log] = []
43+
for log_key in redis.scan_iter(match=key, _type="HASH"):
44+
data = redis.hgetall(log_key)
45+
# convert the data according to the field types of the Log object
46+
log_dict = {
47+
k.decode("utf-8"): v if v != REDIS_NULL else None
48+
for k, v in data.items()
49+
}
50+
if log_dict["l_namespace"] is not None:
51+
log_dict["l_namespace"] = int(log_dict["l_namespace"])
52+
53+
log = Log(**log_dict)
54+
# skip logs that are not newer than start_dt
55+
if start_dt is not None and log.timestamp_dt < start_dt:
56+
continue
57+
logs.append(log)
558

6-
def insert_or_update(wp10db, log):
7-
with wp10db.cursor() as cursor:
8-
cursor.execute(
9-
'''
10-
INSERT INTO logging
11-
(l_project, l_namespace, l_article, l_action, l_timestamp, l_old,
12-
l_new, l_revision_timestamp)
13-
VALUES
14-
(%(l_project)s, %(l_namespace)s, %(l_article)s, %(l_action)s,
15-
%(l_timestamp)s, %(l_old)s, %(l_new)s, %(l_revision_timestamp)s)
16-
ON DUPLICATE KEY UPDATE l_article = l_article
17-
''', attr.asdict(log))
59+
return logs

wp1/logic/page.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ def get_pages_by_category(wikidb, category, ns=None):
3737
yield Page(**result)
3838

3939

40-
def update_page_moved(wp10db, project, old_ns, old_title, new_ns, new_title,
41-
move_timestamp_dt):
40+
def update_page_moved(wp10db, redis, project, old_ns, old_title, new_ns,
41+
new_title, move_timestamp_dt):
4242
logger.debug('Updating moves table for %s -> %s', old_title.decode('utf-8'),
4343
new_title.decode('utf-8'))
4444
db_timestamp = move_timestamp_dt.strftime(TS_FORMAT).encode('utf-8')
@@ -62,7 +62,7 @@ def update_page_moved(wp10db, project, old_ns, old_title, new_ns, new_title,
6262
l_old=b'',
6363
l_new=b'',
6464
l_revision_timestamp=db_timestamp)
65-
logic_log.insert_or_update(wp10db, new_log)
65+
logic_log.insert_or_update(redis, new_log)
6666

6767

6868
def _get_redirects_from_db(wikidb, namespace, title, timestamp_dt):

wp1/logic/page_test.py

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from wp1.constants import TS_FORMAT
99
from wp1.logic import page as logic_page
1010
from wp1.logic import project as logic_project
11+
from wp1.logic import log as logic_log
1112
from wp1.models.wp10.log import Log
1213
from wp1.models.wp10.move import Move
1314
from wp1.models.wp10.namespace import Namespace, NsType
@@ -21,10 +22,8 @@ def get_all_moves(wp10db):
2122
return [Move(**db_move) for db_move in cursor.fetchall()]
2223

2324

24-
def get_all_logs(wp10db):
25-
with wp10db.cursor() as cursor:
26-
cursor.execute('SELECT * FROM ' + Log.table_name)
27-
return [Log(**db_log) for db_log in cursor.fetchall()]
25+
def get_all_logs(redis):
26+
return logic_log.get_logs(redis)
2827

2928

3029
class LogicPageCategoryTest(BaseWikiDbTest):
@@ -251,8 +250,8 @@ def setUp(self):
251250
self.timestamp_db = self.dt.strftime(TS_FORMAT).encode('utf-8')
252251

253252
def test_new_move(self):
254-
logic_page.update_page_moved(self.wp10db, self.project, self.old_ns,
255-
self.old_article, self.new_ns,
253+
logic_page.update_page_moved(self.wp10db, self.redis, self.project,
254+
self.old_ns, self.old_article, self.new_ns,
256255
self.new_article, self.dt)
257256

258257
with self.wp10db.cursor() as cursor:
@@ -271,17 +270,13 @@ def test_new_move(self):
271270
self.assertEqual(self.timestamp_db, move.m_timestamp)
272271

273272
def test_new_move_log(self):
274-
logic_page.update_page_moved(self.wp10db, self.project, self.old_ns,
275-
self.old_article, self.new_ns,
273+
logic_page.update_page_moved(self.wp10db, self.redis, self.project,
274+
self.old_ns, self.old_article, self.new_ns,
276275
self.new_article, self.dt)
277276

278-
with self.wp10db.cursor() as cursor:
279-
cursor.execute(
280-
'''
281-
SELECT * FROM logging
282-
WHERE l_article = %(old_article)s
283-
''', {'old_article': self.old_article})
284-
log = Log(**cursor.fetchone())
277+
logs = logic_log.get_logs(self.redis, article=self.old_article)
278+
self.assertEqual(len(logs), 1)
279+
log = logs[0]
285280

286281
self.assertIsNotNone(log)
287282
self.assertEqual(self.old_ns, log.l_namespace)
@@ -292,25 +287,25 @@ def test_new_move_log(self):
292287
self.assertEqual(self.timestamp_db, log.l_revision_timestamp)
293288

294289
def test_does_not_add_existing_move(self):
295-
logic_page.update_page_moved(self.wp10db, self.project, self.old_ns,
296-
self.old_article, self.new_ns,
290+
logic_page.update_page_moved(self.wp10db, self.redis, self.project,
291+
self.old_ns, self.old_article, self.new_ns,
297292
self.new_article, self.dt)
298293

299-
logic_page.update_page_moved(self.wp10db, self.project, self.old_ns,
300-
self.old_article, self.new_ns,
294+
logic_page.update_page_moved(self.wp10db, self.redis, self.project,
295+
self.old_ns, self.old_article, self.new_ns,
301296
self.new_article, self.dt)
302297

303298
all_moves = get_all_moves(self.wp10db)
304299
self.assertEqual(1, len(all_moves))
305300

306301
def test_does_not_add_existing_log(self):
307-
logic_page.update_page_moved(self.wp10db, self.project, self.old_ns,
308-
self.old_article, self.new_ns,
302+
logic_page.update_page_moved(self.wp10db, self.redis, self.project,
303+
self.old_ns, self.old_article, self.new_ns,
309304
self.new_article, self.dt)
310305

311-
logic_page.update_page_moved(self.wp10db, self.project, self.old_ns,
312-
self.old_article, self.new_ns,
306+
logic_page.update_page_moved(self.wp10db, self.redis, self.project,
307+
self.old_ns, self.old_article, self.new_ns,
313308
self.new_article, self.dt)
314309

315-
all_logs = get_all_logs(self.wp10db)
310+
all_logs = get_all_logs(self.redis)
316311
self.assertEqual(1, len(all_logs))

wp1/logic/project.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,11 @@ def update_project_by_name(project_name, track_progress=False):
7575
if not project:
7676
project = Project(p_project=project_name,
7777
p_timestamp=GLOBAL_TIMESTAMP_WIKI)
78+
7879
update_project(wikidb,
7980
wp10db,
81+
redis,
8082
project,
81-
redis=redis,
8283
track_progress=track_progress)
8384

8485
if track_progress:
@@ -338,9 +339,9 @@ def increment_progress_count(redis, project_name):
338339

339340
def update_project_assessments(wikidb,
340341
wp10db,
342+
redis,
341343
project,
342344
extra_assessments,
343-
redis=None,
344345
track_progress=False):
345346
old_ratings = {}
346347
for rating in logic_rating.get_project_ratings(wp10db, project.p_project):
@@ -365,9 +366,10 @@ def update_project_assessments(wikidb,
365366
seen,
366367
redis=redis,
367368
track_progress=track_progress)
368-
store_new_ratings(wp10db, new_ratings, old_ratings, rating_to_category)
369+
store_new_ratings(wp10db, redis, new_ratings, old_ratings,
370+
rating_to_category)
369371

370-
process_unseen_articles(wikidb, wp10db, project, old_ratings, seen)
372+
process_unseen_articles(wikidb, wp10db, redis, project, old_ratings, seen)
371373

372374

373375
def update_project_assessments_by_kind(wikidb,
@@ -377,7 +379,7 @@ def update_project_assessments_by_kind(wikidb,
377379
kind,
378380
old_ratings,
379381
seen,
380-
redis=None,
382+
redis,
381383
track_progress=False):
382384
if kind not in (AssessmentKind.QUALITY, AssessmentKind.IMPORTANCE):
383385
raise ValueError('Parameter "kind" was not one of QUALITY or IMPORTANCE')
@@ -441,7 +443,8 @@ def update_project_assessments_by_kind(wikidb,
441443
return (new_ratings, rating_to_category)
442444

443445

444-
def store_new_ratings(wp10db, new_ratings, old_ratings, rating_to_category):
446+
def store_new_ratings(wp10db, redis, new_ratings, old_ratings,
447+
rating_to_category):
445448

446449
def sort_rating_tuples(rating_tuple):
447450
rating, kind, _ = rating_tuple
@@ -462,10 +465,10 @@ def sort_rating_tuples(rating_tuple):
462465

463466
if article_ref not in old_ratings or rating_changed:
464467
logic_rating.insert_or_update(wp10db, rating, kind)
465-
logic_rating.add_log_for_rating(wp10db, rating, kind, old_rating_value)
468+
logic_rating.add_log_for_rating(redis, rating, kind, old_rating_value)
466469

467470

468-
def process_unseen_articles(wikidb, wp10db, project, old_ratings, seen):
471+
def process_unseen_articles(wikidb, wp10db, redis, project, old_ratings, seen):
469472
denom = len(old_ratings.keys())
470473
ratio = len(seen) / denom if denom != 0 else 'NaN'
471474

@@ -499,7 +502,7 @@ def process_unseen_articles(wikidb, wp10db, project, old_ratings, seen):
499502
move_data = logic_page.get_move_data(wp10db, wikidb, ns, title,
500503
project.timestamp_dt)
501504
if move_data is not None:
502-
logic_page.update_page_moved(wp10db, project, ns, title,
505+
logic_page.update_page_moved(wp10db, redis, project, ns, title,
503506
move_data['dest_ns'],
504507
move_data['dest_title'],
505508
move_data['timestamp_dt'])
@@ -529,10 +532,10 @@ def process_unseen_articles(wikidb, wp10db, project, old_ratings, seen):
529532
logic_rating.insert_or_update(wp10db, rating, kind)
530533

531534
if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH):
532-
logic_rating.add_log_for_rating(wp10db, rating, AssessmentKind.QUALITY,
535+
logic_rating.add_log_for_rating(redis, rating, AssessmentKind.QUALITY,
533536
old_rating.r_quality)
534537
if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH):
535-
logic_rating.add_log_for_rating(wp10db, rating, AssessmentKind.IMPORTANCE,
538+
logic_rating.add_log_for_rating(redis, rating, AssessmentKind.IMPORTANCE,
536539
old_rating.r_importance)
537540

538541
n += 1
@@ -609,14 +612,14 @@ def update_project_record(wp10db, project, metadata):
609612
insert_or_update(wp10db, project)
610613

611614

612-
def update_project(wikidb, wp10db, project, redis=None, track_progress=False):
615+
def update_project(wikidb, wp10db, redis, project, track_progress=False):
613616
extra_assessments = api_project.get_extra_assessments(project.p_project)
614617

615618
update_project_assessments(wikidb,
616619
wp10db,
620+
redis,
617621
project,
618622
extra_assessments,
619-
redis=redis,
620623
track_progress=track_progress)
621624

622625
cleanup_project(wp10db, project)

0 commit comments

Comments
 (0)