Skip to content

Commit 47423e1

Browse files
committed
Updates from March
1 parent 2cff719 commit 47423e1

7 files changed

+103
-56
lines changed

LICENSE

100644100755
File mode changed.

README.md

100644100755
File mode changed.

labs/01_NLP basics.ipynb

100644100755
+62-17
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": 25,
66
"metadata": {
77
"cell_id": "00001-80603ce7-5e43-4aaa-9c2e-fc333c20a663",
88
"executionInfo": {
@@ -31,7 +31,7 @@
3131
},
3232
{
3333
"cell_type": "code",
34-
"execution_count": 2,
34+
"execution_count": 26,
3535
"metadata": {
3636
"cell_id": "00002-c68972ca-99d0-4fb8-8b2c-1c296e224a4a",
3737
"executionInfo": {
@@ -63,6 +63,37 @@
6363
"text = raw_html.decode(\"utf-8-sig\")"
6464
]
6565
},
66+
{
67+
"cell_type": "code",
68+
"execution_count": 27,
69+
"metadata": {},
70+
"outputs": [
71+
{
72+
"data": {
73+
"text/plain": [
74+
"'<!DOCTYPE html>\\n<html class=\"client-nojs\" lang=\"en\" dir=\"ltr\">\\n<head>\\n <meta charset=\"UTF-8\"/>\\n\\n<tit'"
75+
]
76+
},
77+
"execution_count": 27,
78+
"metadata": {},
79+
"output_type": "execute_result"
80+
}
81+
],
82+
"source": [
83+
"text[:100]"
84+
]
85+
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": 28,
89+
"metadata": {},
90+
"outputs": [],
91+
"source": [
92+
"# Display the HTML content: \n",
93+
"from IPython.display import HTML\n",
94+
"#HTML(text)"
95+
]
96+
},
6697
{
6798
"cell_type": "markdown",
6899
"metadata": {
@@ -78,7 +109,7 @@
78109
},
79110
{
80111
"cell_type": "code",
81-
"execution_count": 3,
112+
"execution_count": 29,
82113
"metadata": {
83114
"cell_id": "00006-381941ee-0649-4a76-9841-1923426edffe",
84115
"executionInfo": {
@@ -125,7 +156,7 @@
125156
},
126157
{
127158
"cell_type": "code",
128-
"execution_count": 4,
159+
"execution_count": 18,
129160
"metadata": {
130161
"cell_id": "00008-c92e3a1e-acf6-4466-8344-0ca3510a6b75",
131162
"colab": {
@@ -168,7 +199,7 @@
168199
},
169200
{
170201
"cell_type": "code",
171-
"execution_count": 5,
202+
"execution_count": 19,
172203
"metadata": {
173204
"cell_id": "00009-e513d93c-17df-4f64-ba5e-748f1628283d",
174205
"executionInfo": {
@@ -211,7 +242,7 @@
211242
},
212243
{
213244
"cell_type": "code",
214-
"execution_count": 6,
245+
"execution_count": 20,
215246
"metadata": {
216247
"cell_id": "00011-cdf4882f-23c8-4056-852c-7aa7f2e398d9",
217248
"colab": {
@@ -243,7 +274,7 @@
243274
"text": [
244275
"[nltk_data] Downloading package stopwords to\n",
245276
"[nltk_data] /home/yashroff/nltk_data...\n",
246-
"[nltk_data] Package stopwords is already up-to-date!\n"
277+
"[nltk_data] Unzipping corpora/stopwords.zip.\n"
247278
]
248279
},
249280
{
@@ -252,7 +283,7 @@
252283
"['fault', 'find']"
253284
]
254285
},
255-
"execution_count": 6,
286+
"execution_count": 20,
256287
"metadata": {},
257288
"output_type": "execute_result"
258289
}
@@ -285,7 +316,7 @@
285316
},
286317
{
287318
"cell_type": "code",
288-
"execution_count": 7,
319+
"execution_count": 21,
289320
"metadata": {
290321
"cell_id": "00013-fdd3703a-21ab-4f5c-bef8-ecccb46953e3",
291322
"colab": {
@@ -317,7 +348,7 @@
317348
"FreqDist({'could': 395, 'upon': 393, 'would': 370, 'great': 298, 'one': 288, 'two': 252, 'time': 240, 'countri': 231, 'made': 228, 'much': 212, ...})"
318349
]
319350
},
320-
"execution_count": 7,
351+
"execution_count": 21,
321352
"metadata": {},
322353
"output_type": "execute_result"
323354
}
@@ -342,7 +373,7 @@
342373
},
343374
{
344375
"cell_type": "code",
345-
"execution_count": 8,
376+
"execution_count": 22,
346377
"metadata": {
347378
"cell_id": "00015-02387729-4804-4ce6-9161-419c503be606",
348379
"colab": {
@@ -383,7 +414,7 @@
383414
" ('much', 212)]"
384415
]
385416
},
386-
"execution_count": 8,
417+
"execution_count": 22,
387418
"metadata": {},
388419
"output_type": "execute_result"
389420
}
@@ -397,7 +428,7 @@
397428
},
398429
{
399430
"cell_type": "code",
400-
"execution_count": 9,
431+
"execution_count": 23,
401432
"metadata": {
402433
"cell_id": "00016-a40281d3-efa8-460f-bbe0-e4090eee870a",
403434
"colab": {
@@ -429,7 +460,7 @@
429460
"(10, 191)"
430461
]
431462
},
432-
"execution_count": 9,
463+
"execution_count": 23,
433464
"metadata": {},
434465
"output_type": "execute_result"
435466
}
@@ -444,7 +475,7 @@
444475
},
445476
{
446477
"cell_type": "code",
447-
"execution_count": 10,
478+
"execution_count": 24,
448479
"metadata": {
449480
"cell_id": "00017-36e9c930-b190-4fa9-9b2d-dbda227ca607",
450481
"colab": {
@@ -510,7 +541,7 @@
510541
" 'wind']"
511542
]
512543
},
513-
"execution_count": 10,
544+
"execution_count": 24,
514545
"metadata": {},
515546
"output_type": "execute_result"
516547
}
@@ -531,6 +562,20 @@
531562
"word_count_vector.toarray()\n",
532563
"count_vectorizer.get_feature_names()"
533564
]
565+
},
566+
{
567+
"cell_type": "code",
568+
"execution_count": null,
569+
"metadata": {},
570+
"outputs": [],
571+
"source": []
572+
},
573+
{
574+
"cell_type": "code",
575+
"execution_count": null,
576+
"metadata": {},
577+
"outputs": [],
578+
"source": []
534579
}
535580
],
536581
"metadata": {
@@ -555,7 +600,7 @@
555600
"name": "python",
556601
"nbconvert_exporter": "python",
557602
"pygments_lexer": "ipython3",
558-
"version": "3.8.5"
603+
"version": "3.8.2"
559604
}
560605
},
561606
"nbformat": 4,

labs/02_inefficient.ipynb

100644100755
+27-27
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
},
1313
{
1414
"cell_type": "code",
15-
"execution_count": 1,
15+
"execution_count": 3,
1616
"metadata": {
1717
"executionInfo": {
1818
"elapsed": 546,
@@ -32,28 +32,21 @@
3232
"source_hash": "a45df8bc",
3333
"tags": []
3434
},
35-
"outputs": [
36-
{
37-
"name": "stderr",
38-
"output_type": "stream",
39-
"text": [
40-
"UsageError: Line magic function `%` not found.\n"
41-
]
42-
}
43-
],
35+
"outputs": [],
4436
"source": [
37+
"%matplotlib inline\n",
38+
"\n",
4539
"import string\n",
4640
"from collections import Counter\n",
4741
"from pprint import pprint\n",
4842
"import gzip\n",
4943
"import matplotlib.pyplot as plt\n",
50-
"import numpy as np \n",
51-
"% matplotlib inline"
44+
"import numpy as np \n"
5245
]
5346
},
5447
{
5548
"cell_type": "code",
56-
"execution_count": 2,
49+
"execution_count": 4,
5750
"metadata": {
5851
"executionInfo": {
5952
"elapsed": 543,
@@ -92,7 +85,7 @@
9285
},
9386
{
9487
"cell_type": "code",
95-
"execution_count": 3,
88+
"execution_count": 5,
9689
"metadata": {
9790
"executionInfo": {
9891
"elapsed": 510,
@@ -135,7 +128,7 @@
135128
},
136129
{
137130
"cell_type": "code",
138-
"execution_count": 4,
131+
"execution_count": 6,
139132
"metadata": {
140133
"colab": {
141134
"base_uri": "https://localhost:8080/"
@@ -185,7 +178,7 @@
185178
},
186179
{
187180
"cell_type": "code",
188-
"execution_count": 5,
181+
"execution_count": 7,
189182
"metadata": {
190183
"executionInfo": {
191184
"elapsed": 681,
@@ -225,7 +218,7 @@
225218
},
226219
{
227220
"cell_type": "code",
228-
"execution_count": 6,
221+
"execution_count": 8,
229222
"metadata": {
230223
"colab": {
231224
"base_uri": "https://localhost:8080/"
@@ -289,7 +282,7 @@
289282
},
290283
{
291284
"cell_type": "code",
292-
"execution_count": 7,
285+
"execution_count": 9,
293286
"metadata": {
294287
"colab": {
295288
"base_uri": "https://localhost:8080/"
@@ -344,7 +337,7 @@
344337
},
345338
{
346339
"cell_type": "code",
347-
"execution_count": 8,
340+
"execution_count": 10,
348341
"metadata": {
349342
"executionInfo": {
350343
"elapsed": 538,
@@ -379,7 +372,7 @@
379372
},
380373
{
381374
"cell_type": "code",
382-
"execution_count": 9,
375+
"execution_count": 11,
383376
"metadata": {
384377
"executionInfo": {
385378
"elapsed": 527,
@@ -407,7 +400,7 @@
407400
},
408401
{
409402
"cell_type": "code",
410-
"execution_count": 10,
403+
"execution_count": 12,
411404
"metadata": {
412405
"executionInfo": {
413406
"elapsed": 667,
@@ -439,7 +432,7 @@
439432
},
440433
{
441434
"cell_type": "code",
442-
"execution_count": 11,
435+
"execution_count": 13,
443436
"metadata": {
444437
"executionInfo": {
445438
"elapsed": 844,
@@ -475,7 +468,7 @@
475468
},
476469
{
477470
"cell_type": "code",
478-
"execution_count": 12,
471+
"execution_count": 14,
479472
"metadata": {
480473
"colab": {
481474
"base_uri": "https://localhost:8080/"
@@ -515,7 +508,7 @@
515508
},
516509
{
517510
"cell_type": "code",
518-
"execution_count": 13,
511+
"execution_count": 15,
519512
"metadata": {
520513
"colab": {
521514
"base_uri": "https://localhost:8080/"
@@ -555,7 +548,7 @@
555548
},
556549
{
557550
"cell_type": "code",
558-
"execution_count": 14,
551+
"execution_count": 16,
559552
"metadata": {
560553
"colab": {
561554
"base_uri": "https://localhost:8080/"
@@ -586,14 +579,21 @@
586579
"True"
587580
]
588581
},
589-
"execution_count": 14,
582+
"execution_count": 16,
590583
"metadata": {},
591584
"output_type": "execute_result"
592585
}
593586
],
594587
"source": [
595588
"mary_hot[6] == 1"
596589
]
590+
},
591+
{
592+
"cell_type": "code",
593+
"execution_count": null,
594+
"metadata": {},
595+
"outputs": [],
596+
"source": []
597597
}
598598
],
599599
"metadata": {
@@ -618,7 +618,7 @@
618618
"name": "python",
619619
"nbconvert_exporter": "python",
620620
"pygments_lexer": "ipython3",
621-
"version": "3.8.5"
621+
"version": "3.8.2"
622622
}
623623
},
624624
"nbformat": 4,

0 commit comments

Comments
 (0)