diff --git a/week2/week2-NER.ipynb b/week2/week2-NER.ipynb index 10772669..a41da283 100644 --- a/week2/week2-NER.ipynb +++ b/week2/week2-NER.ipynb @@ -80,26 +80,32 @@ " \n", " tweet_tokens = []\n", " tweet_tags = []\n", - " for line in open(file_path, encoding='utf-8'):\n", - " line = line.strip()\n", - " if not line:\n", + " with open(file_path, encoding='utf-8') as fin:\n", + " for line in fin:\n", + " line = line.strip()\n", + " if not line:\n", + " if tweet_tokens:\n", + " tokens.append(tweet_tokens)\n", + " tags.append(tweet_tags)\n", + " tweet_tokens = []\n", + " tweet_tags = []\n", + " else:\n", + " token, tag = line.split()\n", + " # Replace all urls with token\n", + " # Replace all users with token\n", + "\n", + " ######################################\n", + " ######### YOUR CODE HERE #############\n", + " ######################################\n", + "\n", + " tweet_tokens.append(token)\n", + " tweet_tags.append(tag)\n", + " else:\n", " if tweet_tokens:\n", " tokens.append(tweet_tokens)\n", " tags.append(tweet_tags)\n", " tweet_tokens = []\n", - " tweet_tags = []\n", - " else:\n", - " token, tag = line.split()\n", - " # Replace all urls with token\n", - " # Replace all users with token\n", - "\n", - " ######################################\n", - " ######### YOUR CODE HERE #############\n", - " ######################################\n", - " \n", - " tweet_tokens.append(token)\n", - " tweet_tags.append(tag)\n", - " \n", + " tweet_tags = [] \n", " return tokens, tags" ] }, @@ -928,7 +934,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.4.3" + "version": "3.7.2" } }, "nbformat": 4,