Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/stale.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Configuration for probot-stale - https://github.com/probot/stale

# Number of days of inactivity before an Issue or Pull Request becomes stale
daysUntilStale: 365
daysUntilStale: 10000

# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
daysUntilClose: 730
daysUntilClose: false

# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
onlyLabels: []
Expand All @@ -25,7 +25,7 @@ exemptMilestones: false
exemptAssignees: false

# Label to use when marking as stale
staleLabel: stale
staleLabel: Needs-Author-Feedback

# Comment to post when marking as stale. Set to `false` to disable
markComment: >
Expand All @@ -34,8 +34,8 @@ markComment: >
for your contributions.

# Comment to post when removing the stale label.
unmarkComment: >
Thank you for interacting with this issue! Removing the stale label!
#unmarkComment: >
# Thank you for interacting with this issue! Removing the stale label!

# Comment to post when closing a stale Issue or Pull Request.
closeComment: >
Expand Down
20 changes: 0 additions & 20 deletions .github/workflows/jira.pr.yml

This file was deleted.

27 changes: 27 additions & 0 deletions .github/workflows/master.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: MLflow tests

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
python-small:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.6
uses: actions/setup-python@v1
with:
python-version: 3.6
- name: Install dependencies
run: |
export GITHUB_WORKFLOW=1
INSTALL_SMALL_PYTHON_DEPS=true source ./travis/install-common-deps.sh
- name: Run tests
run: |
export GITHUB_WORKFLOW=1
export PATH="$HOME/miniconda/bin:$PATH"
source activate test-environment
./travis/run-small-python-tests.sh
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ matrix:
- pip install -r travis/small-requirements.txt
- pip install -e .
script:
- pytest --verbose --ignore=tests/h2o --ignore=tests/keras --ignore=tests/pytorch --ignore=tests/pyfunc --ignore=tests/sagemaker --ignore=tests/sklearn --ignore=tests/spark --ignore=tests/tensorflow --ignore=tests/keras_autolog --ignore=tests/tensorflow_autolog --ignore tests/azureml --ignore tests/onnx --ignore tests/projects --ignore=tests/xgboost --ignore=tests/lightgbm --ignore=tests/spark_autologging tests
- pytest --verbose --ignore=tests/h2o --ignore=tests/keras --ignore=tests/pytorch --ignore=tests/pyfunc --ignore=tests/sagemaker --ignore=tests/sklearn --ignore=tests/spark --ignore=tests/tensorflow --ignore=tests/keras_autolog --ignore=tests/tensorflow_autolog --ignore tests/azureml --ignore tests/onnx --ignore tests/projects --ignore=tests/xgboost --ignore=tests/lightgbm --ignore=tests/spark_autologging --ignore=tests/spacy tests
- language: python
python: 3.6
name: "Docs (rsthtml, javadocs)"
Expand Down
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.viewcode',
'sphinx.ext.napoleon',
'sphinx_click.ext',
]
Expand Down
3 changes: 2 additions & 1 deletion docs/source/model-registry.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ There are three programmatic ways to add a model to the registry. First, you can
from sklearn.ensemble import RandomForestRegressor

import mlflow
import mlflow.sklearn

with mlflow.start_run(run_name="YOUR_RUN_NAME") as run:
params = {"n_estimators": 5, "random_state": 42}
Expand All @@ -103,7 +104,7 @@ There are three programmatic ways to add a model to the registry. First, you can
# Log parameters and metrics using the MLflow APIs
mlflow.log_params(params)
mlflow.log_param("param_1", randint(0, 100))
mlflow.log_metrics({"metric_1": random(), "metric_2", random() + 1})
mlflow.log_metrics({"metric_1": random(), "metric_2": random() + 1})

# Log the sklearn model and register as version 1
mlflow.sklearn.log_model(
Expand Down
5 changes: 4 additions & 1 deletion docs/source/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,10 @@ Example requests:
}'

# record-oriented (fine for vector rows, loses ordering for JSON records)
curl http://127.0.0.1:5000/invocations -H 'Content-Type: application/json; format=pandas-records' -d '[[1, 2, 3], [4, 5, 6]]'
curl http://127.0.0.1:5000/invocations -H 'Content-Type: application/json; format=pandas-records' -d '[
{"a": 1,"b": 2,"c": 3},
{"a": 4,"b": 5,"c": 6}
]'


For more information about serializing pandas DataFrames, see
Expand Down
13 changes: 13 additions & 0 deletions docs/source/search-syntax.rst
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,19 @@ To search all known experiments for any MLflow runs created using the Inception
all_experiments = [exp.experiment_id for exp in MlflowClient().list_experiments()]
runs = MlflowClient().search_runs(experiment_ids=all_experiments, filter_string="params.model = 'Inception'", run_view_type=ViewType.ALL)

R
^^^^^^
The R API is similar to the Python API.

.. code-block:: r

library(mlflow)
mlflow_search_runs(
filter = "metrics.rmse < 0.9 and tags.production = 'true'",
experiment_ids = as.character(1:2),
order_by = "params.lr DESC"
)

Java
^^^^
The Java API is similar to Python API.
Expand Down
4 changes: 2 additions & 2 deletions docs/source/tracking.rst
Original file line number Diff line number Diff line change
Expand Up @@ -238,9 +238,9 @@ Autologging captures the following information:
+------------------+--------------------------------------------------------+--------------------------------------------------------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
| Framework | Metrics | Parameters | Tags | Artifacts |
+------------------+--------------------------------------------------------+--------------------------------------------------------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
| Keras | Training loss; validation loss; user-specified metrics | ``fit()`` parameters; optimizer name; learning rate; epsilon | Model summary | Model summary on training start; `MLflow Model <https://mlflow.org/docs/latest/models.html>`_ (Keras model) on training end |
| Keras | Training loss; validation loss; user-specified metrics | ``fit()`` parameters; optimizer name; learning rate; epsilon | -- | Model summary on training start; `MLflow Model <https://mlflow.org/docs/latest/models.html>`_ (Keras model) on training end |
+------------------+--------------------------------------------------------+--------------------------------------------------------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
| ``tf.keras`` | Training loss; validation loss; user-specified metrics | ``fit()`` parameters; optimizer name; learning rate; epsilon | Model summary | Model summary on training start; `MLflow Model <https://mlflow.org/docs/latest/models.html>`_ (Keras model), TensorBoard logs on training end |
| ``tf.keras`` | Training loss; validation loss; user-specified metrics | ``fit()`` parameters; optimizer name; learning rate; epsilon | -- | Model summary on training start; `MLflow Model <https://mlflow.org/docs/latest/models.html>`_ (Keras model), TensorBoard logs on training end |
+------------------+--------------------------------------------------------+--------------------------------------------------------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
| ``tf.estimator`` | TensorBoard metrics | steps, max_steps | -- | `MLflow Model <https://mlflow.org/docs/latest/models.html>`_ (TF saved model) on call to ``tf.estimator.export_saved_model`` |
+------------------+--------------------------------------------------------+--------------------------------------------------------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
Expand Down
2 changes: 2 additions & 0 deletions docs/source/tutorials-and-examples/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ Below, you can find a number of tutorials and examples for various MLflow use ca

- `Glmnet (R) <https://github.com/mlflow/mlflow/tree/master/examples/r_wine>`_

- `SpaCy <https://github.com/mlflow/mlflow/tree/master/examples/spacy>`_

- scikit-learn

+ `Diabetes example <https://github.com/mlflow/mlflow/tree/master/examples/sklearn_elasticnet_diabetes>`_
Expand Down
10 changes: 9 additions & 1 deletion docs/theme/mlflow/static/css/theme.css
Original file line number Diff line number Diff line change
Expand Up @@ -5291,7 +5291,7 @@ a .rst-content code {
.rst-content .viewcode-link,
.rst-content .viewcode-back {
display: inline-block;
color: #27AE60;
color: #2980B9;
font-size: 80%;
padding-left: 24px
}
Expand Down Expand Up @@ -5340,3 +5340,11 @@ a .rst-content code {
src: local("Inconsolata Bold"), local("Inconsolata-Bold"), url(../fonts/Inconsolata-Bold.ttf) format("truetype")
}
/*# sourceMappingURL=theme.css.map */

div.viewcode-block:target {
background-color: #d3f1f3;
border-top: 1px solid #44c1cb;
border-bottom: 1px solid #44c1cb;
margin: 0px -12px;
padding: 0px 12px;
}
4 changes: 2 additions & 2 deletions examples/flower_classifier/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,14 @@ run_id ``101``.
.. code-block:: bash

# score the deployed model
python score_images_rest.py --model-uri runs:/101/model --port 54321 http://127.0.0.1 --data-path /path/to/images/for/scoring
python score_images_rest.py --host http://127.0.0.1 --port 54321 /path/to/images/for/scoring


- To test batch scoring in Spark, run score_images_spark.py to score the model in Spark like this:

.. code-block:: bash

python score_images_spark.py --model-uri runs:/101/model --data-path /path/to/images/for/scoring
python score_images_spark.py --model-uri runs:/101/model /path/to/images/for/scoring



Expand Down
13 changes: 7 additions & 6 deletions examples/flower_classifier/conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ channels:
- anaconda
dependencies:
- python==3.7
- pandas
- scikit-learn
- tensorflow-mkl
- keras
- pandas==1.0.3
- scikit-learn==0.22.1
- tensorflow==1.13.1
- keras==2.3.1
- pillow==7.0.0
- pip==20.0.2
- pip:
- mlflow>=1.0
- pillow
- mlflow>=1.6
6 changes: 5 additions & 1 deletion examples/flower_classifier/image_pyfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pandas as pd
import PIL
from PIL import Image
import pip
import yaml
import tensorflow as tf

Expand Down Expand Up @@ -128,6 +129,7 @@ def log_model(keras_model, artifact_path, image_dims, domain):
keras_version=keras.__version__,
tf_name=tf.__name__, # can have optional -gpu suffix
tf_version=tf.__version__,
pip_version=pip.__version__,
pillow_version=PIL.__version__))

mlflow.pyfunc.log_model(artifact_path=artifact_path,
Expand Down Expand Up @@ -165,6 +167,8 @@ def _load_pyfunc(path):
- python=={python_version}
- keras=={keras_version}
- {tf_name}=={tf_version}
- pip=={pip_version}
- pillow=={pillow_version}
- pip:
- pillow=={pillow_version}
- mlflow>=1.6
"""
14 changes: 7 additions & 7 deletions examples/flower_classifier/score_images_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
from mlflow.utils import cli_args


def score_model(path, uri, port):
def score_model(path, host, port):
"""
Score images on the local path with MLflow model deployed at given uri and port.

:param path: Path to a single image file or a directory of images.
:param uri: URI the model is deployed at
:param host: host the model is deployed at
:param port: Port the model is deployed at.
:return: Server response.
"""
Expand All @@ -36,7 +36,7 @@ def read_image(x):
data = pd.DataFrame(data=[base64.encodebytes(read_image(x)) for x in filenames],
columns=["image"]).to_json(orient="split")

response = requests.post(url='{uri}:{port}/invocations'.format(uri=uri, port=port),
response = requests.post(url='{host}:{port}/invocations'.format(host=host, port=port),
data=data,
headers={"Content-Type": "application/json; format=pandas-split"})

Expand All @@ -50,14 +50,14 @@ def read_image(x):

@click.command(help="Score images.")
@click.option("--port", type=click.INT, default=80, help="Port at which the model is deployed.")
@cli_args.MODEL_URI
@click.argument("--data-path", "-d")
def run(data_path, model_uri, port):
@cli_args.HOST
@click.argument("data-path")
def run(data_path, host, port):
"""
Score images with MLflow deployed deployed at given uri and port and print out the response
to standard out.
"""
print(score_model(data_path, model_uri, port).text)
print(score_model(data_path, host, port).text)


if __name__ == '__main__':
Expand Down
4 changes: 2 additions & 2 deletions examples/flower_classifier/score_images_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def score_model(spark, data_path, model_uri):

@click.command(help="Score images.")
@cli_args.MODEL_URI
@click.argument("--data-path", "-d")
def run(model_uri, data_path):
@click.argument("data-path")
def run(data_path, model_uri):
with pyspark.sql.SparkSession.builder \
.config(key="spark.python.worker.reuse", value=True) \
.config(key="spark.ui.enabled", value=False) \
Expand Down
7 changes: 7 additions & 0 deletions examples/spacy/MLproject
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: spacy_ner_example

conda_env: conda.yaml

entry_points:
main:
command: "python train.py"
10 changes: 10 additions & 0 deletions examples/spacy/conda.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: spacy-example
channels:
- defaults
- anaconda
dependencies:
- python==3.6
- spacy=2.2.3
- pip:
- mlflow>=1.0

65 changes: 65 additions & 0 deletions examples/spacy/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from __future__ import print_function

import random

import spacy
from spacy.util import minibatch, compounding

import mlflow.spacy

# training data
TRAIN_DATA = [
("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
]

if __name__ == "__main__":
# Adaptation of spaCy example: https://github.com/explosion/spaCy/blob/master/examples/training/train_ner.py

# create blank model and add ner to the pipeline
nlp = spacy.blank("en")
ner = nlp.create_pipe("ner")
nlp.add_pipe(ner, last=True)

# add labels
for _, annotations in TRAIN_DATA:
for ent in annotations.get("entities"):
ner.add_label(ent[2])

params = {
'n_iter':100,
'drop': 0.5
}
mlflow.log_params(params)

nlp.begin_training()
for itn in range(params['n_iter']):
random.shuffle(TRAIN_DATA)
losses = {}
# batch up the examples using spaCy's minibatch
batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(
texts, # batch of texts
annotations, # batch of annotations
drop=params['drop'], # dropout - make it harder to memorise data
losses=losses,
)
print("Losses", losses)
mlflow.log_metrics(losses)

# Log the spaCy model using mlflow
mlflow.spacy.log_model(spacy_model=nlp, artifact_path='model')
model_uri = "runs:/{run_id}/{artifact_path}".format(
run_id=mlflow.active_run().info.run_id,
artifact_path='model')

print("Model saved in run %s" % mlflow.active_run().info.run_uuid)

# Load the model using mlflow and use it to predict data
nlp2 = mlflow.spacy.load_model(model_uri=model_uri)
for text, _ in TRAIN_DATA:
doc = nlp2(text)
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
Loading