update development environment

simonkassel · simonkassel · commit b52f7a0f3193 · 2018-04-05T13:55:14.000-04:00
diff --git a/.gitignore b/.gitignore
@@ -75,5 +75,8 @@ target/
 # Jupyter NB Checkpoints
 .ipynb_checkpoints/
 
+# bash history
+.bash_history
+
 # exclude data from source control by default
-/data/
+# /data/
diff --git a/.test_environment.py.swp b/.test_environment.py.swp
diff --git a/Dockerfile b/Dockerfile
@@ -1,18 +1,50 @@
-# Base image
-FROM python:3.6
 
-# Updating repository sources
-RUN apt-get update
+# Adapted from https://towardsdatascience.com/how-docker-can-help-you-become-a-more-effective-data-scientist-7fc048ef91d5
+FROM ubuntu:16.04
+
+# Adds metadata to the image as a key value pair example LABEL 
+LABEL maintainer="Simon Kassel <skassel@azavea.com>"
+
+RUN apt-get update --fix-missing && apt-get install -y wget bzip2 ca-certificates \
+    build-essential \
+    byobu \
+    curl \
+    git-core \
+    htop \
+    pkg-config \
+    python3-dev \
+    python-pip \
+    python-setuptools \
+    python-virtualenv \
+    unzip \
+    nano \
+    && \
+apt-get clean && \
+rm -rf /var/lib/apt/lists/*
+
+RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \
+    wget --quiet https://repo.continuum.io/archive/Anaconda3-5.0.0.1-Linux-x86_64.sh -O ~/anaconda.sh && \
+    /bin/bash ~/anaconda.sh -b -p /opt/conda && \
+    rm ~/anaconda.sh
+
+ENV PATH /opt/conda/bin:$PATH
 
 # Copying requirements.txt file
 COPY requirements.txt requirements.txt
 
 # pip install 
-RUN pip install --no-cache -r requirements.txt
+RUN pip install --no-cache -r requirements.txt &&\
+    rm requirements.txt
+
+# Open Ports for Jupyter
+EXPOSE 8000
 
-# Exposing ports
-EXPOSE 8888
+#Setup File System
+RUN mkdir project
+ENV HOME=/project
+ENV SHELL=/bin/bash
+VOLUME /project
+WORKDIR /project
 
-# Running jupyter notebook
-# --NotebookApp.token ='demo' is the password
-# CMD ["jupyter", "notebook", "--no-browser", "--ip=0.0.0.0", "--allow-root", "--NotebookApp.token='demo'"]
+# Run a shell script
+CMD  ["/bin/bash"]
diff --git a/README.md b/README.md
@@ -1,54 +1,71 @@
-Azavea data analytics team python project template
+Azavea Data Analytics team python project template
 ==============================
 
-template for Azavea Data Analytics team Python projects
+A file structure template, development environment and rule set for python data analytics projects on the data analytics team
+
+Getting Started
+------------
+From within the root directory, first remove git tracking from the project
+
+`rm -rf .git`
+
+If you have not already done so, build the Docker image (you will only need to do this once)
+
+`docker build -t da-project-template .`
+
+Run a Docker container
+
+`./scripts/container.sh .'
+
+This will open a bash shell within the Docker container. Within the container the 'project' directory on the host machine (as specified as a parameter of `container.sh` above) will map to `/project` within the container. You can now access the full file structure of this template from within the container.
+
+To exit
+
+`exit`
 
 Project Organization
 ------------
 
-    ├── LICENSE
-    ├── Makefile           <- Makefile with commands like `make data` or `make train`
     ├── README.md          <- The top-level README for developers using this project.
     ├── data
-    │   ├── external       <- Data from third party sources.
-    │   ├── interim        <- Intermediate data that has been transformed.
-    │   ├── processed      <- The final, canonical data sets for modeling.
-    │   └── raw            <- The original, immutable data dump.
+    │   ├── interm         <- Intermediate data that has been transformed
+    │   ├── organized      <- Raw datasets that have been renamed or reorganized into a new folder structure but have not been changed at all      
+    │   ├── processed      <- The final, canonical data sets for modeling
+    │   └── raw            <- The original, immutable data dump
+    │
+    ├── docs               <- A default Sphinx project; see sphinx-doc.org for details (currently not configured)
     │
-    ├── docs               <- A default Sphinx project; see sphinx-doc.org for details
+    ├── guide              <- A set of markdown files with documented best practices, guidelines and rools for collaborative projects
     │
     ├── models             <- Trained and serialized models, model predictions, or model summaries
     │
     ├── notebooks          <- Jupyter notebooks. Naming convention is a number (for ordering),
-    │                         the creator's initials, and a short `-` delimited description, e.g.
-    │                         `1.0-jqp-initial-data-exploration`.
+    │                         the creator's initials, and a short `-` delimited description, e.g
+    │                         `1.0-jqp-initial-data-exploration`
     │
     ├── references         <- Data dictionaries, manuals, and all other explanatory materials.
     │
     ├── reports            <- Generated analysis as HTML, PDF, LaTeX, etc.
     │   └── figures        <- Generated graphics and figures to be used in reporting
     │
-    ├── requirements.txt   <- The requirements file for reproducing the analysis environment, e.g.
-    │                         generated with `pip freeze > requirements.txt`
-    │
-    ├── src                <- Source code for use in this project.
-    │   ├── __init__.py    <- Makes src a Python module
-    │   │
-    │   ├── data           <- Scripts to download or generate data
-    │   │   └── make_dataset.py
-    │   │
-    │   ├── features       <- Scripts to turn raw data into features for modeling
-    │   │   └── build_features.py
-    │   │
-    │   ├── models         <- Scripts to train models and then use trained models to make
-    │   │   │                 predictions
-    │   │   ├── predict_model.py
-    │   │   └── train_model.py
-    │   │
-    │   └── visualization  <- Scripts to create exploratory and results oriented visualizations
-    │       └── visualize.py
-    │
-    └── tox.ini            <- tox file with settings for running tox; see tox.testrun.org
+    ├── requirements.txt   <- The requirements file for reproducing the analysis environment
+    │
+    └── src                <- Source code for use in this project.
+        │
+        ├── data           <- Scripts to download or generate data
+        │   └── make_dataset.py
+        │
+        ├── features       <- Scripts to turn raw data into features for modeling
+        │   └── build_features.py
+        │
+        ├── models         <- Scripts to train models and then use trained models to make
+        │   │                 predictions
+        │   ├── predict_model.py
+        │   └── train_model.py
+        │
+        └── visualization  <- Scripts to create exploratory and results oriented visualizations
+            └── visualize.py
+    
 
 
 --------
diff --git a/data/.gitignore b/data/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
diff --git a/deliverables/.gitkeep b/deliverables/.gitkeep
diff --git a/deliverables/figures/.gitkeep b/deliverables/figures/.gitkeep
diff --git a/docs/README.md b/docs/README.md
@@ -0,0 +1 @@
+# [Work in Progress]
diff --git a/guide/README.md b/guide/README.md
@@ -0,0 +1,3 @@
+# Rules & Best Practices
+
+A markdown documentation of rules, guidelines and best practices for working on collaborative data analysis projects on the Data Analytics team 
diff --git a/requirements.txt b/requirements.txt
@@ -55,4 +55,6 @@ traitlets==4.3.2
 wcwidth==0.1.7
 Werkzeug==0.12.2
 widgetsnbextension==3.0.6
-jupyterlab==0.31.9
+jupyterlab==0.31.9
+geopandas==0.3.0
+descartes==1.1.0
diff --git a/scripts/.gitignore b/scripts/.gitignore
@@ -0,0 +1 @@
+
diff --git a/scripts/container.sh b/scripts/container.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# run container
+docker run -it --rm --name da-project \
+	-p 8000:8000 \
+	-v $(pwd $1):/project:rw \
+	da-project-template
+
diff --git a/tox.ini b/tox.ini

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Rules & Best Practices`
	`2`	`+`
	`3`	`+A markdown documentation of rules, guidelines and best practices for working on collaborative data analysis projects on the Data Analytics team`