aarondav · aarondav · Apr 14, 2020 · Apr 14, 2020 · Apr 14, 2020 · Apr 14, 2020
diff --git a/.github/stale.yml b/.github/stale.yml
@@ -1,11 +1,11 @@
 # Configuration for probot-stale - https://github.com/probot/stale
 
 # Number of days of inactivity before an Issue or Pull Request becomes stale
-daysUntilStale: 365
+daysUntilStale: 10000
 
 # Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
 # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
-daysUntilClose: 730
+daysUntilClose: false
 
 # Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
 onlyLabels: []
@@ -25,7 +25,7 @@ exemptMilestones: false
 exemptAssignees: false
 
 # Label to use when marking as stale
-staleLabel: stale
+staleLabel: Needs-Author-Feedback
 
 # Comment to post when marking as stale. Set to `false` to disable
 markComment: >
@@ -34,8 +34,8 @@ markComment: >
   for your contributions.
 
 # Comment to post when removing the stale label.
-unmarkComment: >
-   Thank you for interacting with this issue! Removing the stale label!
+#unmarkComment: >
+#   Thank you for interacting with this issue! Removing the stale label!
 
 # Comment to post when closing a stale Issue or Pull Request.
 closeComment: >

diff --git a/.github/workflows/jira.pr.yml b/.github/workflows/jira.pr.yml
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
@@ -0,0 +1,27 @@
+name: MLflow tests
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  python-small:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.6
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.6
+    - name: Install dependencies
+      run: |
+        export GITHUB_WORKFLOW=1
+        INSTALL_SMALL_PYTHON_DEPS=true source ./travis/install-common-deps.sh
+    - name: Run tests
+      run: |
+        export GITHUB_WORKFLOW=1
+        export PATH="$HOME/miniconda/bin:$PATH"
+        source activate test-environment
+        ./travis/run-small-python-tests.sh
diff --git a/.travis.yml b/.travis.yml
@@ -84,7 +84,7 @@ matrix:
         - pip install -r travis/small-requirements.txt
         - pip install -e .
       script:
-        - pytest --verbose --ignore=tests/h2o --ignore=tests/keras --ignore=tests/pytorch --ignore=tests/pyfunc --ignore=tests/sagemaker --ignore=tests/sklearn  --ignore=tests/spark --ignore=tests/tensorflow --ignore=tests/keras_autolog --ignore=tests/tensorflow_autolog --ignore tests/azureml --ignore tests/onnx --ignore tests/projects --ignore=tests/xgboost --ignore=tests/lightgbm --ignore=tests/spark_autologging tests
+        - pytest --verbose --ignore=tests/h2o --ignore=tests/keras --ignore=tests/pytorch --ignore=tests/pyfunc --ignore=tests/sagemaker --ignore=tests/sklearn  --ignore=tests/spark --ignore=tests/tensorflow --ignore=tests/keras_autolog --ignore=tests/tensorflow_autolog --ignore tests/azureml --ignore tests/onnx --ignore tests/projects --ignore=tests/xgboost --ignore=tests/lightgbm --ignore=tests/spark_autologging --ignore=tests/spacy tests
     - language: python
       python: 3.6
       name: "Docs (rsthtml, javadocs)"

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -34,6 +34,7 @@
 # ones.
 extensions = [
     'sphinx.ext.autodoc',
+    'sphinx.ext.viewcode',
     'sphinx.ext.napoleon',
     'sphinx_click.ext',
 ]

diff --git a/docs/source/model-registry.rst b/docs/source/model-registry.rst
@@ -95,6 +95,7 @@ There are three programmatic ways to add a model to the registry. First, you can
     from sklearn.ensemble import RandomForestRegressor
 
     import mlflow
+    import mlflow.sklearn
 
     with mlflow.start_run(run_name="YOUR_RUN_NAME") as run:
         params = {"n_estimators": 5, "random_state": 42}
@@ -103,7 +104,7 @@ There are three programmatic ways to add a model to the registry. First, you can
         # Log parameters and metrics using the MLflow APIs
         mlflow.log_params(params)
         mlflow.log_param("param_1", randint(0, 100))
-        mlflow.log_metrics({"metric_1": random(), "metric_2", random() + 1})
+        mlflow.log_metrics({"metric_1": random(), "metric_2": random() + 1})
 
         # Log the sklearn model and register as version 1
         mlflow.sklearn.log_model(

diff --git a/docs/source/models.rst b/docs/source/models.rst
@@ -561,7 +561,10 @@ Example requests:
     }'
 
     # record-oriented (fine for vector rows, loses ordering for JSON records)
-    curl http://127.0.0.1:5000/invocations -H 'Content-Type: application/json; format=pandas-records' -d '[[1, 2, 3], [4, 5, 6]]'
+    curl http://127.0.0.1:5000/invocations -H 'Content-Type: application/json; format=pandas-records' -d '[
+        {"a": 1,"b": 2,"c": 3},
+        {"a": 4,"b": 5,"c": 6}
+    ]'
 
 
 For more information about serializing pandas DataFrames, see

diff --git a/docs/source/search-syntax.rst b/docs/source/search-syntax.rst
@@ -185,6 +185,19 @@ To search all known experiments for any MLflow runs created using the Inception
   all_experiments = [exp.experiment_id for exp in MlflowClient().list_experiments()]
   runs = MlflowClient().search_runs(experiment_ids=all_experiments, filter_string="params.model = 'Inception'", run_view_type=ViewType.ALL)
 
+R
+^^^^^^
+The R API is similar to the Python API.
+
+.. code-block:: r
+
+  library(mlflow)
+  mlflow_search_runs(
+    filter = "metrics.rmse < 0.9 and tags.production = 'true'",
+    experiment_ids = as.character(1:2),
+    order_by = "params.lr DESC"
+  )
+
 Java
 ^^^^
 The Java API is similar to Python API.

diff --git a/docs/source/tracking.rst b/docs/source/tracking.rst
@@ -238,9 +238,9 @@ Autologging captures the following information:
 +------------------+--------------------------------------------------------+--------------------------------------------------------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
 | Framework        | Metrics                                                | Parameters                                                   | Tags          | Artifacts                                                                                                                                        |
 +------------------+--------------------------------------------------------+--------------------------------------------------------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
-| Keras            | Training loss; validation loss; user-specified metrics | ``fit()`` parameters; optimizer name; learning rate; epsilon | Model summary | Model summary on training start; `MLflow Model <https://mlflow.org/docs/latest/models.html>`_ (Keras model) on training end                      |
+| Keras            | Training loss; validation loss; user-specified metrics | ``fit()`` parameters; optimizer name; learning rate; epsilon | --            | Model summary on training start; `MLflow Model <https://mlflow.org/docs/latest/models.html>`_ (Keras model) on training end                      |
 +------------------+--------------------------------------------------------+--------------------------------------------------------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
-| ``tf.keras``     | Training loss; validation loss; user-specified metrics | ``fit()`` parameters; optimizer name; learning rate; epsilon | Model summary | Model summary on training start; `MLflow Model <https://mlflow.org/docs/latest/models.html>`_ (Keras model), TensorBoard logs on training end    |
+| ``tf.keras``     | Training loss; validation loss; user-specified metrics | ``fit()`` parameters; optimizer name; learning rate; epsilon | --            | Model summary on training start; `MLflow Model <https://mlflow.org/docs/latest/models.html>`_ (Keras model), TensorBoard logs on training end    |
 +------------------+--------------------------------------------------------+--------------------------------------------------------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
 | ``tf.estimator`` | TensorBoard metrics                                    | steps, max_steps                                             | --            | `MLflow Model <https://mlflow.org/docs/latest/models.html>`_ (TF saved model) on call to ``tf.estimator.export_saved_model``                     |
 +------------------+--------------------------------------------------------+--------------------------------------------------------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+

diff --git a/docs/source/tutorials-and-examples/index.rst b/docs/source/tutorials-and-examples/index.rst
@@ -33,6 +33,8 @@ Below, you can find a number of tutorials and examples for various MLflow use ca
 
   - `Glmnet (R) <https://github.com/mlflow/mlflow/tree/master/examples/r_wine>`_
 
+  - `SpaCy <https://github.com/mlflow/mlflow/tree/master/examples/spacy>`_
+
   - scikit-learn
 
     + `Diabetes example <https://github.com/mlflow/mlflow/tree/master/examples/sklearn_elasticnet_diabetes>`_

diff --git a/docs/theme/mlflow/static/css/theme.css b/docs/theme/mlflow/static/css/theme.css
@@ -5291,7 +5291,7 @@ a .rst-content code {
 .rst-content .viewcode-link,
 .rst-content .viewcode-back {
     display: inline-block;
-    color: #27AE60;
+    color: #2980B9;
     font-size: 80%;
     padding-left: 24px
 }
@@ -5340,3 +5340,11 @@ a .rst-content code {
     src: local("Inconsolata Bold"), local("Inconsolata-Bold"), url(../fonts/Inconsolata-Bold.ttf) format("truetype")
 }
 /*# sourceMappingURL=theme.css.map */
+
+div.viewcode-block:target {
+    background-color: #d3f1f3;
+    border-top: 1px solid #44c1cb;
+    border-bottom: 1px solid #44c1cb;
+    margin: 0px -12px;
+    padding: 0px 12px;
+}
diff --git a/examples/flower_classifier/README.rst b/examples/flower_classifier/README.rst
@@ -98,14 +98,14 @@ run_id ``101``.
   .. code-block:: bash
 
       # score the deployed model
-      python score_images_rest.py --model-uri runs:/101/model --port 54321 http://127.0.0.1 --data-path /path/to/images/for/scoring
+      python score_images_rest.py --host http://127.0.0.1 --port 54321 /path/to/images/for/scoring
 
 
 - To test batch scoring in Spark, run score_images_spark.py to score the model in Spark like this:
 
 .. code-block:: bash
 
-   python score_images_spark.py --model-uri runs:/101/model --data-path /path/to/images/for/scoring
+   python score_images_spark.py --model-uri runs:/101/model /path/to/images/for/scoring
 
 
 

diff --git a/examples/flower_classifier/conda.yaml b/examples/flower_classifier/conda.yaml
@@ -4,10 +4,11 @@ channels:
   - anaconda
 dependencies:
   - python==3.7
-  - pandas
-  - scikit-learn
-  - tensorflow-mkl
-  - keras
+  - pandas==1.0.3
+  - scikit-learn==0.22.1
+  - tensorflow==1.13.1
+  - keras==2.3.1
+  - pillow==7.0.0
+  - pip==20.0.2
   - pip:
-    - mlflow>=1.0
-    - pillow
+    - mlflow>=1.6
diff --git a/examples/flower_classifier/image_pyfunc.py b/examples/flower_classifier/image_pyfunc.py
@@ -10,6 +10,7 @@
 import pandas as pd
 import PIL
 from PIL import Image
+import pip
 import yaml
 import tensorflow as tf
 
@@ -128,6 +129,7 @@ def log_model(keras_model, artifact_path, image_dims, domain):
                                               keras_version=keras.__version__,
                                               tf_name=tf.__name__,  # can have optional -gpu suffix
                                               tf_version=tf.__version__,
+                                              pip_version=pip.__version__,
                                               pillow_version=PIL.__version__))
 
         mlflow.pyfunc.log_model(artifact_path=artifact_path,
@@ -165,6 +167,8 @@ def _load_pyfunc(path):
   - python=={python_version}
   - keras=={keras_version}
   - {tf_name}=={tf_version}
+  - pip=={pip_version}  
+  - pillow=={pillow_version}
   - pip:
-    - pillow=={pillow_version}
+    - mlflow>=1.6
 """
diff --git a/examples/flower_classifier/score_images_rest.py b/examples/flower_classifier/score_images_rest.py
@@ -14,12 +14,12 @@
 from mlflow.utils import cli_args
 
 
-def score_model(path, uri, port):
+def score_model(path, host, port):
     """
     Score images on the local path with MLflow model deployed at given uri and port.
 
     :param path: Path to a single image file or a directory of images.
-    :param uri: URI the model is deployed at
+    :param host: host the model is deployed at
     :param port: Port the model is deployed at.
     :return: Server response.
     """
@@ -36,7 +36,7 @@ def read_image(x):
     data = pd.DataFrame(data=[base64.encodebytes(read_image(x)) for x in filenames],
                         columns=["image"]).to_json(orient="split")
 
-    response = requests.post(url='{uri}:{port}/invocations'.format(uri=uri, port=port),
+    response = requests.post(url='{host}:{port}/invocations'.format(host=host, port=port),
                              data=data,
                              headers={"Content-Type": "application/json; format=pandas-split"})
 
@@ -50,14 +50,14 @@ def read_image(x):
 
 @click.command(help="Score images.")
 @click.option("--port", type=click.INT, default=80, help="Port at which the model is deployed.")
-@cli_args.MODEL_URI
-@click.argument("--data-path", "-d")
-def run(data_path, model_uri, port):
+@cli_args.HOST
+@click.argument("data-path")
+def run(data_path, host, port):
     """
     Score images with MLflow deployed deployed at given uri and port and print out the response
     to standard out.
     """
-    print(score_model(data_path, model_uri, port).text)
+    print(score_model(data_path, host, port).text)
 
 
 if __name__ == '__main__':

diff --git a/examples/flower_classifier/score_images_spark.py b/examples/flower_classifier/score_images_spark.py
@@ -69,8 +69,8 @@ def score_model(spark, data_path, model_uri):
 
 @click.command(help="Score images.")
 @cli_args.MODEL_URI
-@click.argument("--data-path", "-d")
-def run(model_uri, data_path):
+@click.argument("data-path")
+def run(data_path, model_uri):
     with pyspark.sql.SparkSession.builder \
             .config(key="spark.python.worker.reuse", value=True) \
             .config(key="spark.ui.enabled", value=False) \

diff --git a/examples/spacy/MLproject b/examples/spacy/MLproject
@@ -0,0 +1,7 @@
+name: spacy_ner_example
+
+conda_env: conda.yaml
+
+entry_points:
+  main:
+    command: "python train.py"
diff --git a/examples/spacy/conda.yaml b/examples/spacy/conda.yaml
@@ -0,0 +1,10 @@
+name: spacy-example
+channels:
+  - defaults
+  - anaconda
+dependencies:
+  - python==3.6
+  - spacy=2.2.3
+  - pip:
+    - mlflow>=1.0
+
diff --git a/examples/spacy/train.py b/examples/spacy/train.py
@@ -0,0 +1,65 @@
+from __future__ import print_function
+
+import random
+
+import spacy
+from spacy.util import minibatch, compounding
+
+import mlflow.spacy
+
+# training data
+TRAIN_DATA = [
+    ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
+    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
+]
+
+if __name__ == "__main__":
+    # Adaptation of spaCy example: https://github.com/explosion/spaCy/blob/master/examples/training/train_ner.py
+
+    # create blank model and add ner to the pipeline
+    nlp = spacy.blank("en")
+    ner = nlp.create_pipe("ner")
+    nlp.add_pipe(ner, last=True)
+
+    # add labels
+    for _, annotations in TRAIN_DATA:
+        for ent in annotations.get("entities"):
+            ner.add_label(ent[2])
+
+    params = {
+        'n_iter':100,
+        'drop': 0.5
+    }
+    mlflow.log_params(params)
+
+    nlp.begin_training()
+    for itn in range(params['n_iter']):
+        random.shuffle(TRAIN_DATA)
+        losses = {}
+        # batch up the examples using spaCy's minibatch
+        batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
+        for batch in batches:
+            texts, annotations = zip(*batch)
+            nlp.update(
+                texts,  # batch of texts
+                annotations,  # batch of annotations
+                drop=params['drop'],  # dropout - make it harder to memorise data
+                losses=losses,
+            )
+        print("Losses", losses)
+        mlflow.log_metrics(losses)
+
+    # Log the spaCy model using mlflow
+    mlflow.spacy.log_model(spacy_model=nlp, artifact_path='model')
+    model_uri = "runs:/{run_id}/{artifact_path}".format(
+        run_id=mlflow.active_run().info.run_id,
+        artifact_path='model')
+
+    print("Model saved in run %s" % mlflow.active_run().info.run_uuid)
+
+    # Load the model using mlflow and use it to predict data
+    nlp2 = mlflow.spacy.load_model(model_uri=model_uri)
+    for text, _ in TRAIN_DATA:
+        doc = nlp2(text)
+        print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
+        print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])