-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathextract_openedx_data.py
60 lines (48 loc) · 1.78 KB
/
extract_openedx_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""Management command for populating oll course data"""
import json
from pathlib import Path
from django.core.management import BaseCommand
from learning_resources.etl import mit_edx, mit_edx_programs, oll
from learning_resources.etl.constants import ETLSource
from main.utils import now_in_utc
EXTRACTORS = {
ETLSource.oll.name: oll.extract,
ETLSource.mit_edx.name: mit_edx.extract,
f"{ETLSource.mit_edx.name}_programs": mit_edx_programs.extract,
}
def extract_data(etl_source):
"""Extract data from the given source"""
return EXTRACTORS[etl_source]()
class Command(BaseCommand):
"""Populate oll courses"""
help = "Populate oll courses"
def add_arguments(self, parser):
parser.add_argument(
"--etl_source",
dest="etl_source",
required=True,
choices=list(EXTRACTORS),
help="The ETL source data to extract",
)
parser.add_argument(
"--output",
dest="outfile",
required=True,
help="The ETL source data to extract",
)
super().add_arguments(parser)
def handle(self, *args, **options): # noqa: ARG002
"""Run Populate oll courses"""
etl_source = options["etl_source"]
outfile = options["outfile"]
self.stdout.write(f"Starting to get {etl_source} course data")
start = now_in_utc()
data = extract_data(etl_source)
total_seconds = (now_in_utc() - start).total_seconds()
self.stdout.write(
f"Extraction of {etl_source} data finished, took {total_seconds} seconds"
)
self.stdout.write(f"Writing data to {outfile}")
with Path(outfile).open("w") as f:
json.dump(data, f)
self.stdout.write(f"Data written to {outfile}")