diff --git a/wp1/base_db_test.py b/wp1/base_db_test.py index b7c850b0f..5b2e899c7 100644 --- a/wp1/base_db_test.py +++ b/wp1/base_db_test.py @@ -8,6 +8,10 @@ from wp1.environment import Environment from wp1.models.wp10.selection import Selection +from wp1.redis_db import connect as redis_connect + +from wp1.models.wp10.rating import Rating + logger = logging.getLogger(__name__) try: @@ -93,10 +97,28 @@ def _setup_wp_one_db(self): cursor.execute(stmt) self.wp10db.commit() + def connect_redis_db(self): + if ENV != Environment.TEST: + raise ValueError( + 'Database tests destroy data! They should only be run in the TEST env' + ) + return redis_connect() + + def _setup_redis_db(self): + self.redis = self.connect_redis_db() + self.redis.ping() + self.redis.flushdb() + + def _cleanup_redis_db(self): + self.redis.flushdb() + def setUp(self): self.addCleanup(self._cleanup_wp_one_db) self._setup_wp_one_db() + self.addCleanup(self._cleanup_redis_db) + self._setup_redis_db() + class BaseWikiDbTest(WpOneAssertions): @@ -144,6 +166,9 @@ def setUp(self): self.addCleanup(self._cleanup_wp_one_db) self._setup_wp_one_db() + self.addCleanup(self._cleanup_redis_db) + self._setup_redis_db() + def get_first_selection(wp10db): with wp10db.cursor() as cursor: diff --git a/wp1/logic/log.py b/wp1/logic/log.py index d20fd3df4..d3f79b19a 100644 --- a/wp1/logic/log.py +++ b/wp1/logic/log.py @@ -1,17 +1,59 @@ -import attr +import datetime +import attr +from redis import Redis +from wp1.redis_db import gen_redis_log_key from wp1.models.wp10.log import Log +# Redis does not allow None types. However if a log to be stored has a None +# we convert it to this value while storing on Redis and back to None +# when converting from Redis to python object +REDIS_NULL = b"__redis__none__" + + +def insert_or_update(redis: Redis, log: Log): + log_key = gen_redis_log_key(project=log.l_project, + namespace=log.l_namespace, + action=log.l_action, + article=log.l_article) + with redis.pipeline() as pipe: + mapping = { + k: REDIS_NULL if v is None else v for k, v in attr.asdict(log).items() + } + pipe.hset(log_key, mapping=mapping) + pipe.expire(log_key, datetime.timedelta(days=7)) + pipe.execute() + + +def get_logs( + redis: Redis, + *, + project: str | bytes = "*", + namespace: str | bytes = "*", + action: str | bytes = "*", + article: str | bytes = "*", + start_dt: datetime.datetime | None = None, +) -> list[Log]: + """Retrieve logs from Redis matching the given filters.""" + key = gen_redis_log_key(project=project, + namespace=namespace, + action=action, + article=article) + logs: list[Log] = [] + for log_key in redis.scan_iter(match=key, _type="HASH"): + data = redis.hgetall(log_key) + # convert the data according to the field types of the Log object + log_dict = { + k.decode("utf-8"): v if v != REDIS_NULL else None + for k, v in data.items() + } + if log_dict["l_namespace"] is not None: + log_dict["l_namespace"] = int(log_dict["l_namespace"]) + + log = Log(**log_dict) + # skip logs that are not newer than start_dt + if start_dt is not None and log.timestamp_dt < start_dt: + continue + logs.append(log) -def insert_or_update(wp10db, log): - with wp10db.cursor() as cursor: - cursor.execute( - ''' - INSERT INTO logging - (l_project, l_namespace, l_article, l_action, l_timestamp, l_old, - l_new, l_revision_timestamp) - VALUES - (%(l_project)s, %(l_namespace)s, %(l_article)s, %(l_action)s, - %(l_timestamp)s, %(l_old)s, %(l_new)s, %(l_revision_timestamp)s) - ON DUPLICATE KEY UPDATE l_article = l_article - ''', attr.asdict(log)) + return logs diff --git a/wp1/logic/page.py b/wp1/logic/page.py index 79e2e97f5..95a083927 100644 --- a/wp1/logic/page.py +++ b/wp1/logic/page.py @@ -37,8 +37,8 @@ def get_pages_by_category(wikidb, category, ns=None): yield Page(**result) -def update_page_moved(wp10db, project, old_ns, old_title, new_ns, new_title, - move_timestamp_dt): +def update_page_moved(wp10db, redis, project, old_ns, old_title, new_ns, + new_title, move_timestamp_dt): logger.debug('Updating moves table for %s -> %s', old_title.decode('utf-8'), new_title.decode('utf-8')) db_timestamp = move_timestamp_dt.strftime(TS_FORMAT).encode('utf-8') @@ -62,7 +62,7 @@ def update_page_moved(wp10db, project, old_ns, old_title, new_ns, new_title, l_old=b'', l_new=b'', l_revision_timestamp=db_timestamp) - logic_log.insert_or_update(wp10db, new_log) + logic_log.insert_or_update(redis, new_log) def _get_redirects_from_db(wikidb, namespace, title, timestamp_dt): diff --git a/wp1/logic/page_test.py b/wp1/logic/page_test.py index 5489b435f..bccb4b2a2 100644 --- a/wp1/logic/page_test.py +++ b/wp1/logic/page_test.py @@ -8,6 +8,7 @@ from wp1.constants import TS_FORMAT from wp1.logic import page as logic_page from wp1.logic import project as logic_project +from wp1.logic import log as logic_log from wp1.models.wp10.log import Log from wp1.models.wp10.move import Move from wp1.models.wp10.namespace import Namespace, NsType @@ -21,10 +22,8 @@ def get_all_moves(wp10db): return [Move(**db_move) for db_move in cursor.fetchall()] -def get_all_logs(wp10db): - with wp10db.cursor() as cursor: - cursor.execute('SELECT * FROM ' + Log.table_name) - return [Log(**db_log) for db_log in cursor.fetchall()] +def get_all_logs(redis): + return logic_log.get_logs(redis) class LogicPageCategoryTest(BaseWikiDbTest): @@ -251,8 +250,8 @@ def setUp(self): self.timestamp_db = self.dt.strftime(TS_FORMAT).encode('utf-8') def test_new_move(self): - logic_page.update_page_moved(self.wp10db, self.project, self.old_ns, - self.old_article, self.new_ns, + logic_page.update_page_moved(self.wp10db, self.redis, self.project, + self.old_ns, self.old_article, self.new_ns, self.new_article, self.dt) with self.wp10db.cursor() as cursor: @@ -271,17 +270,13 @@ def test_new_move(self): self.assertEqual(self.timestamp_db, move.m_timestamp) def test_new_move_log(self): - logic_page.update_page_moved(self.wp10db, self.project, self.old_ns, - self.old_article, self.new_ns, + logic_page.update_page_moved(self.wp10db, self.redis, self.project, + self.old_ns, self.old_article, self.new_ns, self.new_article, self.dt) - with self.wp10db.cursor() as cursor: - cursor.execute( - ''' - SELECT * FROM logging - WHERE l_article = %(old_article)s - ''', {'old_article': self.old_article}) - log = Log(**cursor.fetchone()) + logs = logic_log.get_logs(self.redis, article=self.old_article) + self.assertEqual(len(logs), 1) + log = logs[0] self.assertIsNotNone(log) self.assertEqual(self.old_ns, log.l_namespace) @@ -292,25 +287,25 @@ def test_new_move_log(self): self.assertEqual(self.timestamp_db, log.l_revision_timestamp) def test_does_not_add_existing_move(self): - logic_page.update_page_moved(self.wp10db, self.project, self.old_ns, - self.old_article, self.new_ns, + logic_page.update_page_moved(self.wp10db, self.redis, self.project, + self.old_ns, self.old_article, self.new_ns, self.new_article, self.dt) - logic_page.update_page_moved(self.wp10db, self.project, self.old_ns, - self.old_article, self.new_ns, + logic_page.update_page_moved(self.wp10db, self.redis, self.project, + self.old_ns, self.old_article, self.new_ns, self.new_article, self.dt) all_moves = get_all_moves(self.wp10db) self.assertEqual(1, len(all_moves)) def test_does_not_add_existing_log(self): - logic_page.update_page_moved(self.wp10db, self.project, self.old_ns, - self.old_article, self.new_ns, + logic_page.update_page_moved(self.wp10db, self.redis, self.project, + self.old_ns, self.old_article, self.new_ns, self.new_article, self.dt) - logic_page.update_page_moved(self.wp10db, self.project, self.old_ns, - self.old_article, self.new_ns, + logic_page.update_page_moved(self.wp10db, self.redis, self.project, + self.old_ns, self.old_article, self.new_ns, self.new_article, self.dt) - all_logs = get_all_logs(self.wp10db) + all_logs = get_all_logs(self.redis) self.assertEqual(1, len(all_logs)) diff --git a/wp1/logic/project.py b/wp1/logic/project.py index 81da178b2..7ec061854 100644 --- a/wp1/logic/project.py +++ b/wp1/logic/project.py @@ -75,10 +75,11 @@ def update_project_by_name(project_name, track_progress=False): if not project: project = Project(p_project=project_name, p_timestamp=GLOBAL_TIMESTAMP_WIKI) + update_project(wikidb, wp10db, + redis, project, - redis=redis, track_progress=track_progress) if track_progress: @@ -338,9 +339,9 @@ def increment_progress_count(redis, project_name): def update_project_assessments(wikidb, wp10db, + redis, project, extra_assessments, - redis=None, track_progress=False): old_ratings = {} for rating in logic_rating.get_project_ratings(wp10db, project.p_project): @@ -365,9 +366,10 @@ def update_project_assessments(wikidb, seen, redis=redis, track_progress=track_progress) - store_new_ratings(wp10db, new_ratings, old_ratings, rating_to_category) + store_new_ratings(wp10db, redis, new_ratings, old_ratings, + rating_to_category) - process_unseen_articles(wikidb, wp10db, project, old_ratings, seen) + process_unseen_articles(wikidb, wp10db, redis, project, old_ratings, seen) def update_project_assessments_by_kind(wikidb, @@ -377,7 +379,7 @@ def update_project_assessments_by_kind(wikidb, kind, old_ratings, seen, - redis=None, + redis, track_progress=False): if kind not in (AssessmentKind.QUALITY, AssessmentKind.IMPORTANCE): raise ValueError('Parameter "kind" was not one of QUALITY or IMPORTANCE') @@ -441,7 +443,8 @@ def update_project_assessments_by_kind(wikidb, return (new_ratings, rating_to_category) -def store_new_ratings(wp10db, new_ratings, old_ratings, rating_to_category): +def store_new_ratings(wp10db, redis, new_ratings, old_ratings, + rating_to_category): def sort_rating_tuples(rating_tuple): rating, kind, _ = rating_tuple @@ -462,10 +465,10 @@ def sort_rating_tuples(rating_tuple): if article_ref not in old_ratings or rating_changed: logic_rating.insert_or_update(wp10db, rating, kind) - logic_rating.add_log_for_rating(wp10db, rating, kind, old_rating_value) + logic_rating.add_log_for_rating(redis, rating, kind, old_rating_value) -def process_unseen_articles(wikidb, wp10db, project, old_ratings, seen): +def process_unseen_articles(wikidb, wp10db, redis, project, old_ratings, seen): denom = len(old_ratings.keys()) ratio = len(seen) / denom if denom != 0 else 'NaN' @@ -499,7 +502,7 @@ def process_unseen_articles(wikidb, wp10db, project, old_ratings, seen): move_data = logic_page.get_move_data(wp10db, wikidb, ns, title, project.timestamp_dt) if move_data is not None: - logic_page.update_page_moved(wp10db, project, ns, title, + logic_page.update_page_moved(wp10db, redis, project, ns, title, move_data['dest_ns'], move_data['dest_title'], move_data['timestamp_dt']) @@ -529,10 +532,10 @@ def process_unseen_articles(wikidb, wp10db, project, old_ratings, seen): logic_rating.insert_or_update(wp10db, rating, kind) if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH): - logic_rating.add_log_for_rating(wp10db, rating, AssessmentKind.QUALITY, + logic_rating.add_log_for_rating(redis, rating, AssessmentKind.QUALITY, old_rating.r_quality) if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH): - logic_rating.add_log_for_rating(wp10db, rating, AssessmentKind.IMPORTANCE, + logic_rating.add_log_for_rating(redis, rating, AssessmentKind.IMPORTANCE, old_rating.r_importance) n += 1 @@ -609,14 +612,14 @@ def update_project_record(wp10db, project, metadata): insert_or_update(wp10db, project) -def update_project(wikidb, wp10db, project, redis=None, track_progress=False): +def update_project(wikidb, wp10db, redis, project, track_progress=False): extra_assessments = api_project.get_extra_assessments(project.p_project) update_project_assessments(wikidb, wp10db, + redis, project, extra_assessments, - redis=redis, track_progress=track_progress) cleanup_project(wp10db, project) diff --git a/wp1/logic/project_test.py b/wp1/logic/project_test.py index 54bde85f2..1370f281d 100644 --- a/wp1/logic/project_test.py +++ b/wp1/logic/project_test.py @@ -9,6 +9,7 @@ from wp1.conf import get_conf from wp1.constants import AssessmentKind, CATEGORY_NS_INT, GLOBAL_TIMESTAMP_WIKI, TS_FORMAT from wp1.logic import project as logic_project +from wp1.logic import log as logic_log from wp1.models.wiki.page import Page from wp1.models.wp10.category import Category from wp1.models.wp10.log import Log @@ -42,10 +43,8 @@ def _get_all_ratings(wp10db): return [Rating(**db_rating) for db_rating in cursor.fetchall()] -def _get_all_logs(wp10db): - with wp10db.cursor() as cursor: - cursor.execute('SELECT * FROM logging') - return [Log(**db_log) for db_log in cursor.fetchall()] +def _get_all_logs(redis): + return logic_log.get_logs(redis) def _get_all_global_article_scores(wp10db): @@ -582,7 +581,7 @@ def test_old_rating_same_quality(self): self._insert_ratings(self.quality_pages[6:], 0, AssessmentKind.QUALITY) logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) ratings = _get_all_ratings(self.wp10db) self.assertNotEqual(0, len(ratings)) @@ -602,7 +601,7 @@ def test_old_rating_same_importance(self): AssessmentKind.IMPORTANCE) logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) ratings = _get_all_ratings(self.wp10db) self.assertNotEqual(0, len(ratings)) @@ -624,7 +623,7 @@ def test_old_rating_update_quality(self): override_rating=NOT_A_CLASS.encode('utf-8')) logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) ratings = _get_all_ratings(self.wp10db) self.assertNotEqual(0, len(ratings)) @@ -646,7 +645,7 @@ def test_old_rating_update_importance(self): override_rating=NOT_A_CLASS.encode('utf-8')) logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) ratings = _get_all_ratings(self.wp10db) self.assertNotEqual(0, len(ratings)) @@ -669,7 +668,7 @@ def test_old_rating_update_both(self): override_rating=NOT_A_CLASS.encode('utf-8')) logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) ratings = _get_all_ratings(self.wp10db) self.assertNotEqual(0, len(ratings)) @@ -692,7 +691,7 @@ def test_new_rating_quality(self): expected_global_ts = b'20190113000000' with patch('wp1.logic.rating.GLOBAL_TIMESTAMP', expected_global_ts): logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) ratings = _get_all_ratings(self.wp10db) self.assertNotEqual(0, len(ratings)) @@ -706,7 +705,7 @@ def test_new_rating_quality(self): for r in ratings: self.assertEqual(q_page_to_rating[r.r_article], r.r_quality) - logs = _get_all_logs(self.wp10db) + logs = _get_all_logs(self.redis) self.assertEqual(len(q_pages), len(logs)) actual_log_titles = set(l.l_article for l in logs) @@ -726,7 +725,7 @@ def test_new_rating_importance(self): expected_global_ts = b'20190113000000' with patch('wp1.logic.rating.GLOBAL_TIMESTAMP', expected_global_ts): logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) ratings = _get_all_ratings(self.wp10db) self.assertNotEqual(0, len(ratings)) @@ -740,7 +739,7 @@ def test_new_rating_importance(self): for r in ratings: self.assertEqual(i_page_to_rating[r.r_article], r.r_importance) - logs = _get_all_logs(self.wp10db) + logs = _get_all_logs(self.redis) self.assertEqual(len(i_pages), len(logs)) actual_log_titles = set(l.l_article for l in logs) @@ -761,7 +760,7 @@ def test_new_rating_both(self): expected_global_ts = b'20190113000000' with patch('wp1.logic.rating.GLOBAL_TIMESTAMP', expected_global_ts): logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) ratings = _get_all_ratings(self.wp10db) self.assertNotEqual(0, len(ratings)) @@ -782,7 +781,7 @@ def test_new_rating_both(self): for r in ratings: self.assertEqual(i_page_to_rating[r.r_article], r.r_importance) - logs = _get_all_logs(self.wp10db) + logs = _get_all_logs(self.redis) self.assertEqual(len(q_pages) + len(i_pages), len(logs)) def test_custom_rating(self): @@ -803,7 +802,7 @@ def test_custom_rating(self): expected_global_ts = b'20190113000000' with patch('wp1.logic.rating.GLOBAL_TIMESTAMP', expected_global_ts): logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, extra) + self.redis, self.project, extra) ratings = _get_all_ratings(self.wp10db) self.assertNotEqual(0, len(ratings)) @@ -824,7 +823,7 @@ def _do_assessment(self): expected_global_ts = b'20190113000000' with patch('wp1.logic.rating.GLOBAL_TIMESTAMP', expected_global_ts): logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) def _assert_updated_quality_ratings(self, assert_log_len=True): ratings = _get_all_ratings(self.wp10db) @@ -842,7 +841,7 @@ def _assert_updated_quality_ratings(self, assert_log_len=True): self.assertEqual(q_page_to_rating[r.r_article], r.r_quality) if assert_log_len: - logs = _get_all_logs(self.wp10db) + logs = _get_all_logs(self.redis) self.assertEqual(len(q_pages), len(logs)) def _assert_updated_importance_ratings(self, assert_log_len=True): @@ -861,7 +860,7 @@ def _assert_updated_importance_ratings(self, assert_log_len=True): self.assertEqual(i_page_to_rating[r.r_article], r.r_importance) if assert_log_len: - logs = _get_all_logs(self.wp10db) + logs = _get_all_logs(self.redis) self.assertEqual(len(i_pages), len(logs)) def test_multiple_new_quality(self): @@ -918,7 +917,7 @@ def fake_api(*args, **kwargs): patched_site.api.side_effect = fake_api logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) self.assertEqual(2, len(patched_site.api.call_args_list)) @@ -951,7 +950,7 @@ def fake_api(*args, **kwargs): patched_site.api.side_effect = fake_api logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) self.assertEqual(2, len(patched_site.api.call_args_list)) @@ -986,7 +985,7 @@ def fake_api(*args, **kwargs): patched_site.api.side_effect = fake_api logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) patched_site.assert_not_called() @@ -1006,7 +1005,7 @@ def fake_api(*args, **kwargs): patched_site.api.side_effect = fake_api logic_project.update_project_assessments(self.wikidb, self.wp10db, - self.project, {}) + self.redis, self.project, {}) patched_site.assert_not_called() @@ -1022,7 +1021,8 @@ def setUp(self): self._insert_global_scores() - @patch('wp1.logic.project.api_project.get_extra_assessments', return_value={'extra': {}}) + @patch('wp1.logic.project.api_project.get_extra_assessments', + return_value={'extra': {}}) def test_update_global_articles_table(self, mock_api_project): expected = [{ 'a_article': b'Art of testing', @@ -1116,7 +1116,8 @@ def test_update_global_articles_table(self, mock_api_project): 'a_score': 35 }] - logic_project.update_project(self.wikidb, self.wp10db, self.project) + logic_project.update_project(self.wikidb, self.wp10db, self.redis, + self.project) logic_project.update_global_articles_for_project_name( self.wp10db, self.project.p_project) @@ -1431,13 +1432,12 @@ def setUp(self): self._insert_pages(self.importance_pages) self._insert_ratings(zip(self.quality_pages[6:], self.importance_pages[4:]), 0, 'both') - self.redis = fakeredis.FakeStrictRedis() def test_initial_work_count(self): logic_project.update_project_assessments(self.wikidb, self.wp10db, + self.redis, self.project, {}, - redis=self.redis, track_progress=True) actual = self.redis.hget(b'progress:%s' % self.project.p_project, 'work') self.assertEqual(b'34', actual) @@ -1445,8 +1445,8 @@ def test_initial_work_count(self): def test_final_progress(self): logic_project.update_project_assessments(self.wikidb, self.wp10db, + self.redis, self.project, {}, - redis=self.redis, track_progress=True) actual = self.redis.hget(b'progress:%s' % self.project.p_project, 'progress') diff --git a/wp1/logic/rating.py b/wp1/logic/rating.py index 422dc0969..137effbb1 100644 --- a/wp1/logic/rating.py +++ b/wp1/logic/rating.py @@ -341,7 +341,7 @@ def count_unassessed_importance_for_project(wp10db, project): return cursor.fetchone()['cnt'] -def add_log_for_rating(wp10db, new_rating, kind, old_rating_value): +def add_log_for_rating(redis, new_rating, kind, old_rating_value): if kind == AssessmentKind.QUALITY: action = b'quality' timestamp = new_rating.r_quality_timestamp @@ -361,4 +361,4 @@ def add_log_for_rating(wp10db, new_rating, kind, old_rating_value): l_old=old_rating_value, l_new=new, l_revision_timestamp=timestamp) - logic_log.insert_or_update(wp10db, log) + logic_log.insert_or_update(redis, log) diff --git a/wp1/logic/rating_test.py b/wp1/logic/rating_test.py index 4d665e4e6..54c548987 100644 --- a/wp1/logic/rating_test.py +++ b/wp1/logic/rating_test.py @@ -1,6 +1,7 @@ from wp1.base_db_test import BaseWpOneDbTest from wp1.constants import AssessmentKind from wp1.logic import rating as logic_rating +from wp1.logic import log as logic_log from wp1.models.wp10.log import Log from wp1.models.wp10.rating import Rating @@ -13,18 +14,12 @@ def test_add_log_for_quality_rating(self): r_article=b'Testing Stuff', r_quality=b'GA-Class', r_quality_timestamp=b'2018-04-01T12:30:00Z') - logic_rating.add_log_for_rating(self.wp10db, rating, AssessmentKind.QUALITY, + logic_rating.add_log_for_rating(self.redis, rating, AssessmentKind.QUALITY, b'NotA-Class') - with self.wp10db.cursor() as cursor: - cursor.execute( - ''' - SELECT * FROM ''' + Log.table_name + ''' - WHERE l_article = %s - ''', (b'Testing Stuff',)) - db_log = cursor.fetchone() - self.assertIsNotNone(db_log) - log = Log(**db_log) + logs = logic_log.get_logs(self.redis, article=b'Testing Stuff') + self.assertEqual(len(logs), 1) + log = logs[0] self.assertEqual(b'Test Project', log.l_project) self.assertEqual(0, log.l_namespace) self.assertEqual(b'Testing Stuff', log.l_article) @@ -38,18 +33,12 @@ def test_add_log_for_importance_rating(self): r_article=b'Testing Stuff', r_importance=b'Mid-Class', r_importance_timestamp=b'2018-04-01T12:30:00Z') - logic_rating.add_log_for_rating(self.wp10db, rating, + logic_rating.add_log_for_rating(self.redis, rating, AssessmentKind.IMPORTANCE, b'NotA-Class') - with self.wp10db.cursor() as cursor: - cursor.execute( - ''' - SELECT * FROM ''' + Log.table_name + ''' - WHERE l_article = %s - ''', (b'Testing Stuff',)) - db_log = cursor.fetchone() - self.assertIsNotNone(db_log) - log = Log(**db_log) + logs = logic_log.get_logs(self.redis, article=b"Testing Stuff") + self.assertEqual(len(logs), 1) + log = logs[0] self.assertEqual(b'Test Project', log.l_project) self.assertEqual(0, log.l_namespace) self.assertEqual(b'Testing Stuff', log.l_article) diff --git a/wp1/logs.py b/wp1/logs.py index 36b19b33a..c9a51fddb 100644 --- a/wp1/logs.py +++ b/wp1/logs.py @@ -5,8 +5,10 @@ from wp1 import api from wp1.conf import get_conf +from wp1.redis_db import connect as redis_connect from wp1.constants import LOG_NS, LOG_DATE_FORMAT, TS_FORMAT, TS_FORMAT_WP10, MAX_LOGS_PER_DAY from wp1.logic.util import int_to_ns +from wp1.logic import log as logic_log from wp1.models.wp10.log import Log from wp1.templates import env as jinja_env from wp1.time import get_current_datetime @@ -26,15 +28,6 @@ def log_page_name(project_name): project_name.decode('utf-8')) -def get_logs(wp10db, project_name, start_dt): - wp10db.ping() - with wp10db.cursor() as cursor: - cursor.execute( - 'SELECT * FROM logging WHERE l_project = %s AND l_timestamp > %s', - (project_name, start_dt.strftime(TS_FORMAT_WP10))) - return [Log(**db_log) for db_log in cursor.fetchall()] - - def move_target(wp10db, ns, article, db_timestamp): wp10db.ping() with wp10db.cursor() as cursor: @@ -73,7 +66,7 @@ def talk_page_for_article(wp10db, name, namespace): name.decode('utf-8')) -def calculate_logs_to_update(wikidb, wp10db, project_name, from_dt=None): +def calculate_logs_to_update(redis, project_name, from_dt=None): """ Return a dictionary of datetime -> list of log objects that should be uploaded to Wikipedia. If from_dt is given, the logs are calculated based on @@ -84,7 +77,7 @@ def calculate_logs_to_update(wikidb, wp10db, project_name, from_dt=None): from_dt.replace(hour=23, minute=59, second=59) dt_to_log = defaultdict(list) - for log in get_logs(wp10db, project_name, from_dt): + for log in logic_log.get_logs(redis, project=project_name, start_dt=from_dt): dt_to_log[log.timestamp_dt.date()].append(log) return dt_to_log @@ -199,10 +192,11 @@ def generate_log_edits(wikidb, wp10db, project_name, log_map): def update_log_page_for_project(project_name): wikidb = wiki_connect() wp10db = wp10_connect() + redis = redis_connect() logging.basicConfig(level=logging.INFO) try: - log_map = calculate_logs_to_update(wikidb, wp10db, project_name) + log_map = calculate_logs_to_update(redis, project_name) edits = generate_log_edits(wikidb, wp10db, project_name, log_map) p = api.get_page(log_page_name(project_name)) diff --git a/wp1/logs_test.py b/wp1/logs_test.py index 34a04e047..94f831c4e 100644 --- a/wp1/logs_test.py +++ b/wp1/logs_test.py @@ -5,6 +5,7 @@ import attr from wp1 import logs +from wp1.logic import log as logic_log from wp1.base_db_test import BaseCombinedDbTest from wp1.models.wp10.log import Log @@ -117,17 +118,8 @@ def _move_logs(self): return logs def _insert_logs(self, logs=None): - with self.wp10db.cursor() as cursor: - cursor.executemany( - ''' - INSERT INTO logging - (l_project, l_namespace, l_article, l_old, l_new, l_action, - l_timestamp, l_revision_timestamp) - VALUES - (%(l_project)s, %(l_namespace)s, %(l_article)s, %(l_old)s, %(l_new)s, - %(l_action)s, %(l_timestamp)s, %(l_revision_timestamp)s) - ''', [attr.asdict(log) for log in logs]) - self.wp10db.commit() + for log in logs: + logic_log.insert_or_update(self.redis, log) def _insert_moves(self): move_data = [(0, m[0], 0, m[1], @@ -245,7 +237,9 @@ def test_get_logs(self): b'20181226101010', b'NotA-Class', b'Category-Class', b'2018-12-26T05:10:10Z') ] - actual = logs.get_logs(self.wp10db, self.project, datetime(2018, 11, 24)) + actual = logic_log.get_logs(self.redis, + project=self.project, + start_dt=datetime(2018, 11, 24)) actual = list(attr.astuple(a) for a in actual) self.assertEqual(sorted(expected), sorted(actual)) @@ -289,8 +283,7 @@ def test_talk_page_for_article_category(self): @patch('wp1.logs.get_current_datetime', return_value=datetime(2018, 12, 28, 12)) def test_calculate_logs_to_update_keys(self, patched_datetime_now): - actual = logs.calculate_logs_to_update(self.wikidb, self.wp10db, - self.project) + actual = logs.calculate_logs_to_update(self.redis, self.project) self.assertEqual(3, len(actual)) self.assertTrue(datetime(2018, 12, 25).date() in actual) self.assertTrue(datetime(2018, 12, 26).date() in actual) @@ -305,8 +298,7 @@ def test_calculate_logs_to_update_values(self, patched_current_datetime): ] expected.extend(l for l in self._move_logs() if l.l_revision_timestamp.startswith(b'2018-12-25')) - actual = logs.calculate_logs_to_update(self.wikidb, self.wp10db, - self.project) + actual = logs.calculate_logs_to_update(self.redis, self.project) for d in ((25, b'2018-12-25'), (26, b'2018-12-26'), (27, b'2018-12-27')): expected = [ @@ -564,15 +556,17 @@ def test_generate_log_edits(self): self.assertTrue(actual[1].startswith('=== December 26, 2018 ===')) self.assertTrue(actual[2].startswith('=== December 25, 2018 ===')) + @patch('wp1.logs.redis_connect') @patch('wp1.logs.wiki_connect') @patch('wp1.logs.wp10_connect') @patch('wp1.logs.api') def test_upload_log_page_for_project(self, patched_api, patched_wp10, - patched_wiki): + patched_wiki, patched_redis): logs.update_log_page_for_project(b'Catholicism') call = patched_api.save_page.call_args[0] self.assertEqual('Update logs for past 7 days', call[2]) + @patch('wp1.logs.redis_connect') @patch('wp1.logs.wiki_connect') @patch('wp1.logs.wp10_connect') @patch('wp1.logs.api') @@ -580,7 +574,7 @@ def test_upload_log_page_for_project(self, patched_api, patched_wp10, return_value=datetime(2018, 12, 28, 12)) def test_upload_log_page_for_project_no_logs(self, patched_datetime, patched_api, patched_wp10, - patched_wiki): + patched_wiki, patched_redis): project_name = b'Catholicism' header = '{{Log}}\n' no_logs_msg = ("'''There were no logs for this project from December 21, " @@ -589,13 +583,14 @@ def test_upload_log_page_for_project_no_logs(self, patched_datetime, call = patched_api.save_page.call_args[0] self.assertEqual(header + no_logs_msg, call[1]) + @patch('wp1.logs.redis_connect') @patch('wp1.logs.wiki_connect') @patch('wp1.logs.wp10_connect') @patch('wp1.logs.api') @patch('wp1.logs.generate_log_edits') def test_upload_log_page_for_project_huge_text(self, patched_generate, patched_api, patched_wp10, - patched_wiki): + patched_wiki, patched_redis): project_name = b'Catholicism' header = '{{Log}}\n' text = 'a' * 1000 * 1024 @@ -604,13 +599,15 @@ def test_upload_log_page_for_project_huge_text(self, patched_generate, call = patched_api.save_page.call_args[0] self.assertEqual('%s%s\n%s' % (header, text, text), call[1]) + @patch('wp1.logs.redis_connect') @patch('wp1.logs.wiki_connect') @patch('wp1.logs.wp10_connect') @patch('wp1.logs.api') @patch('wp1.logs.generate_log_edits') def test_upload_log_page_for_project_huge_give_up(self, patched_generate, patched_api, patched_wp10, - patched_wiki): + patched_wiki, + patched_redis): project_name = b'Catholicism' sorry_msg = ('Sorry, all of the logs for this date were too large to ' 'upload.') diff --git a/wp1/models/wp10/log_test.py b/wp1/models/wp10/log_test.py index f425ace29..7a241a089 100644 --- a/wp1/models/wp10/log_test.py +++ b/wp1/models/wp10/log_test.py @@ -20,7 +20,7 @@ def setUp(self): l_old=b'NotA-Class', l_new=b'Mid-Class', l_revision_timestamp=b'2018-01-01T12:00:00Z') - logic_log.insert_or_update(self.wp10db, self.log) + logic_log.insert_or_update(self.redis, self.log) def test_timestamp_dt(self): dt = self.log.timestamp_dt diff --git a/wp1/queues_test.py b/wp1/queues_test.py index aa843fe5f..cf9f86af3 100644 --- a/wp1/queues_test.py +++ b/wp1/queues_test.py @@ -14,11 +14,9 @@ class QueuesTest(BaseWpOneDbTest): def setUp(self): super().setUp() - self.redis = fakeredis.FakeStrictRedis() def tearDown(self): super().tearDown() - self.redis = None @patch('wp1.queues.ENV', Environment.DEVELOPMENT) @patch('wp1.queues.logic_project.update_project_by_name') diff --git a/wp1/redis_db.py b/wp1/redis_db.py index 945244967..e2a297f62 100644 --- a/wp1/redis_db.py +++ b/wp1/redis_db.py @@ -16,3 +16,9 @@ def connect(): creds = CREDENTIALS[ENV]['REDIS'] return Redis(**creds) + + +def gen_redis_log_key(*, project: str | bytes, namespace: str | bytes, + action: str | bytes, article: str | bytes) -> str: + to_str = lambda x: x.decode("utf-8") if isinstance(x, bytes) else x + return f"wp1:logs:{to_str(project)}:{to_str(namespace)}:{to_str(action)}:{to_str(article)}"