Skip to content

Commit 8d9b6f3

Browse files
committed
re-run old notebooks, add current requirements file
1 parent e05b7fb commit 8d9b6f3

File tree

7 files changed

+973
-168
lines changed

7 files changed

+973
-168
lines changed

dgrtwo-translations/2019-02-19-us_phds/us_phds_pandas.ipynb

Lines changed: 596 additions & 31 deletions
Large diffs are not rendered by default.

dgrtwo-translations/2019-02-19-us_phds/us_phds_siuba-tabbed.ipynb

Lines changed: 124 additions & 74 deletions
Large diffs are not rendered by default.

dgrtwo-translations/2019-02-19-us_phds/us_phds_siuba.ipynb

Lines changed: 32 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
{
22
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Tidy Tuesday in Python: 2019-02-19 - US Phds\n",
8+
"\n",
9+
"This analysis is a python translation of Dave Robinson's R [code](https://github.com/dgrtwo/data-screencasts/blob/master/us_phds.Rmd) and corresponding [screencast](https://youtu.be/KzRP40PzopY).\n",
10+
"\n",
11+
"It compares the code for these python approaches side-by-side:\n",
12+
"\n",
13+
"* [siuba](http://github.com/machow/siuba) \n",
14+
"* [pandas](https://github.com/pandas-dev/pandas)"
15+
]
16+
},
317
{
418
"cell_type": "code",
519
"execution_count": 1,
@@ -187,21 +201,16 @@
187201
}
188202
],
189203
"source": [
190-
"\n",
191-
"renamed = (\n",
192-
" major_fields_raw\n",
193-
" >> rename(field=\"Field of study\")\n",
194-
" # >> gather(\"key\", \"value\", -_.field)\n",
195-
")\n",
204+
"renamed = rename(major_fields_raw, field = \"Field of study\")\n",
196205
"\n",
197206
"melted_majors = renamed.melt(id_vars = \"field\", var_name = \"key\")\n",
198207
"\n",
199208
"major_fields = (\n",
200209
" melted_majors\n",
201210
" >> mutate(\n",
202-
" year=if_else(_.key.str.contains(\"Unnamed: \").fillna(False), np.nan, _.key),\n",
211+
" year=_.key.replace(\"Unnamed\", np.nan, regex = True),\n",
203212
" type=_.value.where(_.value.str.contains(\"Number|Percent\"), np.nan),\n",
204-
" value=_.value.apply(pd.to_numeric, args=(\"coerce\",)),\n",
213+
" value=lambda d: pd.to_numeric(d.value, \"coerce\")\n",
205214
" )\n",
206215
" >> mutate(year=_.year.ffill(), type=_.type.ffill())\n",
207216
" >> select(-_.key)\n",
@@ -221,7 +230,7 @@
221230
},
222231
{
223232
"cell_type": "code",
224-
"execution_count": 4,
233+
"execution_count": 19,
225234
"metadata": {
226235
"pytabs": {
227236
"class": "pytabs-4",
@@ -344,15 +353,15 @@
344353
"[3543 rows x 3 columns]"
345354
]
346355
},
347-
"execution_count": 4,
356+
"execution_count": 19,
348357
"metadata": {},
349358
"output_type": "execute_result"
350359
}
351360
],
352361
"source": [
353362
"fine_fields = (\n",
354363
" pd.read_excel(\"./data/sed17-sr-tab013.xlsx\", skiprows=3)\n",
355-
" >> rename(field = _[\"Fine field of study\"])\n",
364+
" >> rename(field = \"Fine field of study\")\n",
356365
" >> gather(\"year\", \"number\", -_.field)\n",
357366
" >> filter(_.number != 'na')\n",
358367
" >> mutate(\n",
@@ -478,13 +487,10 @@
478487
" >> rename(field=\"Unnamed: 0\")\n",
479488
" >> filter(~_.field.isin(sex))\n",
480489
" >> mutate(\n",
481-
" field=case_when(\n",
482-
" {\n",
483-
" _.field == \"Life sciencesb\": \"Life sciences\",\n",
484-
" _.field == \"Otherc\": \"Other\",\n",
485-
" True: _.field,\n",
486-
" }\n",
487-
" )\n",
490+
" field=_.field.rename({\n",
491+
" \"Life sciencesb\": \"Life sciences\",\n",
492+
" \"Otherc\": \"Other\"\n",
493+
" })\n",
488494
" )\n",
489495
")\n",
490496
"\n",
@@ -670,27 +676,16 @@
670676
" >> rename(field=\"Sex and major field of study\")\n",
671677
" >> select(-_.contains(\"change\"))\n",
672678
" >> mutate(\n",
673-
" field=case_when(\n",
674-
" {\n",
675-
" _.field == \"All doctorate recipientsa\": \"All\",\n",
676-
" _.field == \"Otherb\": \"Other\",\n",
677-
" True: _.field,\n",
678-
" }\n",
679-
" ),\n",
680-
" sex=_.field.map({\"Male\": \"Male\", \"Female\": \"Female\", \"All\": \"All\"}),\n",
681-
" broad_field=\n",
682-
" if_else(_.field.isin(broad_fields), _.field, np.nan)\n",
683-
" )\n",
684-
"\n",
685-
" >> mutate(\n",
686-
" broad_field = _.broad_field.apply(lambda x: x if x != 'nan' else np.nan).ffill(),\n",
687-
" sex = _.sex.ffill()\n",
679+
" field=_.field.replace({\n",
680+
" \"All doctorate recipientsa\": \"All\",\n",
681+
" \"Otherb\": \"Other\"\n",
682+
" }),\n",
683+
" sex=_.field.where(_.field.isin([\"Male\", \"Female\", \"All\"])),\n",
684+
" broad_field= _.field.where(_.field.isin(broad_fields))\n",
688685
" )\n",
686+
" >> mutate(sex = _.sex.ffill(), broad_field = _.broad_field.ffill())\n",
689687
" >> gather(\"year\", \"number\", -_.sex, -_.broad_field, -_.field)\n",
690-
" >> filter(\n",
691-
" ~_.field.isin(sex),\n",
692-
" _.number.notna()\n",
693-
" )\n",
688+
" >> filter(~_.field.isin(sex), _.number.notna())\n",
694689
" >> mutate(year = _.year.astype(int))\n",
695690
")\n",
696691
"\n",

dgrtwo-translations/2019-03-05-women-workplace/women-workplace-siuba-tabbed.html

Lines changed: 30 additions & 11 deletions
Large diffs are not rendered by default.

dgrtwo-translations/2019-03-05-women-workplace/women-workplace-siuba-tabbed.ipynb

Lines changed: 29 additions & 14 deletions
Large diffs are not rendered by default.

dgrtwo-translations/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ templates:
3434

3535
2019-03-12-board-games/tabbed-analysis.html: 2019-03-12-board-games
3636
cd "$<" && nbtabs convert -e --out tabbed-analysis.ipynb board-games-siuba.Rmd board-games.Rmd
37-
jupyter nbconvert --to html --template templates/nbtabs-full.tpl "$</tabbed-analysis.ipynb"
37+
jupyter nbconvert --to html --template templates/nbtabs-full.tpl "$</tabbed.ipynb"
3838

3939

4040

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
aiohttp==3.6.2
2+
altair==4.0.1
3+
appdirs==1.4.3
4+
appnope==0.1.0
5+
argh==0.26.2
6+
astor==0.8.1
7+
async-generator==1.10
8+
async-timeout==3.0.1
9+
attrs==19.3.0
10+
backcall==0.1.0
11+
base58==2.0.0
12+
black==19.10b0
13+
bleach==3.1.0
14+
blinker==1.4
15+
bokeh==1.4.0
16+
boto3==1.12.6
17+
botocore==1.15.6
18+
certifi==2019.11.28
19+
cffi==1.13.2
20+
chardet==3.0.4
21+
Click==7.0
22+
colorama==0.4.3
23+
cycler==0.10.0
24+
dash==1.9.0
25+
dash-core-components==1.8.0
26+
dash-html-components==1.0.2
27+
dash-renderer==1.2.4
28+
dash-table==4.6.0
29+
decorator==4.4.1
30+
defusedxml==0.6.0
31+
descartes==1.1.0
32+
-e git+https://github.com/django-extensions/django-extensions.git@0ad61ae76b121344ccd2ad4bbf4af4e4b5b8f70e#egg=django_extensions
33+
docutils==0.15.2
34+
entrypoints==0.3
35+
enum-compat==0.0.3
36+
Flask==1.1.1
37+
Flask-Compress==1.4.0
38+
future==0.18.2
39+
gitdb2==2.0.6
40+
GitPython==3.0.5
41+
idna==2.8
42+
idna-ssl==1.1.0
43+
importlib-metadata==1.4.0
44+
ipykernel==5.1.3
45+
ipython==7.11.1
46+
ipython-genutils==0.2.0
47+
ipywidgets==7.5.1
48+
itsdangerous==1.1.0
49+
jedi==0.15.2
50+
Jinja2==2.10.3
51+
jmespath==0.9.5
52+
joblib==0.14.1
53+
json5==0.8.5
54+
jsonschema==3.2.0
55+
jupyter-client==5.3.4
56+
jupyter-contrib-core==0.3.3
57+
jupyter-contrib-nbextensions==0.5.1
58+
jupyter-core==4.6.1
59+
jupyter-highlight-selected-word==0.2.0
60+
jupyter-latex-envs==1.4.6
61+
jupyter-nbextensions-configurator==0.4.1
62+
jupyter-server==0.1.1
63+
jupyter-server-proxy==1.2.0
64+
jupyterlab==1.2.6
65+
jupyterlab-code-formatter==1.0.3
66+
jupyterlab-dash==0.1.0a3
67+
jupyterlab-git==0.9.0
68+
jupyterlab-pygments==0.1.0
69+
jupyterlab-quickopen==0.3.0
70+
jupyterlab-server==1.0.6
71+
jupytext==1.3.2
72+
keyring==21.1.0
73+
kiwisolver==1.1.0
74+
lxml==4.5.0
75+
Markdown==3.1.1
76+
MarkupSafe==1.1.1
77+
matplotlib==3.1.2
78+
mistletoe==0.7.2
79+
mistune==0.8.4
80+
mizani==0.6.0
81+
more-itertools==8.1.0
82+
multidict==4.7.5
83+
multipy==0.16
84+
nbconvert==5.6.1
85+
nbdime==1.1.0
86+
nbformat==5.0.4
87+
-e [email protected]:machow/nbtabs.git@61199b91809a8f502ada774be67fc51e337d7d98#egg=nbtabs
88+
notebook==6.0.3
89+
numpy==1.18.1
90+
packaging==19.2
91+
palettable==3.3.0
92+
pandas==1.0.1
93+
pandocfilters==1.4.2
94+
parso==0.5.2
95+
pathspec==0.7.0
96+
pathtools==0.1.2
97+
patsy==0.5.1
98+
pexpect==4.8.0
99+
pickleshare==0.7.5
100+
Pillow==7.0.0
101+
pkginfo==1.5.0.1
102+
plotly==4.5.1
103+
plotnine==0.6.0
104+
pluggy==0.13.1
105+
prometheus-client==0.7.1
106+
prompt-toolkit==3.0.2
107+
protobuf==3.11.3
108+
psycopg2==2.8.4
109+
ptyprocess==0.6.0
110+
py==1.8.1
111+
pycparser==2.19
112+
pydeck==0.2.1
113+
Pygments==2.5.2
114+
pyparsing==2.4.6
115+
pyrsistent==0.15.7
116+
pytest==5.3.4
117+
python-dateutil==2.8.0
118+
pytz==2019.3
119+
PyYAML==5.3
120+
pyzmq==18.1.1
121+
qgrid==1.2.0
122+
readme-renderer==24.0
123+
regex==2020.1.8
124+
requests==2.22.0
125+
requests-toolbelt==0.9.1
126+
retrying==1.3.3
127+
rpy2==3.2.4
128+
s3transfer==0.3.3
129+
scikit-learn==0.22.1
130+
scipy==1.4.1
131+
Send2Trash==1.5.0
132+
simpervisor==0.3
133+
simplegeneric==0.8.1
134+
-e [email protected]:machow/siuba.git@42872e5cb4068e6bd1be53d04619ea2ec5bab186#egg=siuba
135+
six==1.14.0
136+
smmap2==2.0.5
137+
SQLAlchemy==1.3.13
138+
statsmodels==0.11.0
139+
streamlit==0.56.0
140+
terminado==0.8.3
141+
testpath==0.4.4
142+
toml==0.10.0
143+
toolz==0.10.0
144+
tornado==5.1.1
145+
tqdm==4.42.1
146+
traitlets==4.3.3
147+
twine==3.1.1
148+
typed-ast==1.4.1
149+
typing-extensions==3.7.4.1
150+
tzlocal==2.0.0
151+
urllib3==1.25.8
152+
validators==0.14.2
153+
voila==0.1.20
154+
watchdog==0.10.2
155+
wcwidth==0.1.8
156+
webencodings==0.5.1
157+
Werkzeug==1.0.0
158+
widgetsnbextension==3.5.1
159+
xlrd==1.2.0
160+
yarl==1.4.2
161+
zipp==2.0.1

0 commit comments

Comments
 (0)