4040import csv
4141import json
4242import os
43+ import re
4344import time
45+ from email .utils import parseaddr
4446from pprint import pprint
4547
4648import click
@@ -146,6 +148,11 @@ def get_plan_uuids_by_name(plans_by_name_file):
146148 return plans_by_name
147149
148150
151+ def is_valid_email (email ):
152+ _ , address = parseaddr (email )
153+ return bool (address )
154+
155+
149156def get_email_chunks (input_file_path , plans_by_name , chunk_size = DEFAULT_CHUNK_SIZE ):
150157 """
151158 Yield chunks of (chunk_id, subscription_plan, email) from the given input file.
@@ -159,14 +166,21 @@ def get_email_chunks(input_file_path, plans_by_name, chunk_size=DEFAULT_CHUNK_SI
159166 current_chunk = []
160167 chunk_id = 0
161168 current_subscription_plan_uuid = None
162- with open (input_file_path , 'r' ) as f_in :
169+ # CSVs can contain non-ascii characters, latin-1
170+ # is the encoding that currently works with our production input.
171+ # could eventually be parameterized as input to this command.
172+ with open (input_file_path , 'r' , encoding = 'latin-1' ) as f_in :
163173 reader = csv .DictReader (f_in , fieldnames = INPUT_FIELDNAMES , delimiter = ',' )
164174
165175 # read and skip the header
166176 next (reader )
167177
168178 for row in reader :
169179 email = row ['email' ]
180+ if not is_valid_email (email ):
181+ print ("Invalid email:" , email )
182+ continue
183+
170184 university_name = row ['university_name' ]
171185 subscription_plan_uuid = plans_by_name [university_name ]
172186
@@ -198,7 +212,7 @@ def get_email_chunks(input_file_path, plans_by_name, chunk_size=DEFAULT_CHUNK_SI
198212
199213def _post_assignments (subscription_plan_uuid , emails_for_chunk , environment = 'local' , fetch_jwt = False ):
200214 """
201- Maket the POST request to assign licenses.
215+ Make the POST request to assign licenses.
202216 """
203217 url_pattern = ENVIRONMENTS [environment ]
204218 url = url_pattern .format (subscription_plan_uuid = subscription_plan_uuid )
0 commit comments