diff --git a/mlflow-pytorch/iris/Dockerfile b/mlflow-pytorch/iris/Dockerfile new file mode 100644 index 0000000..ce6ea33 --- /dev/null +++ b/mlflow-pytorch/iris/Dockerfile @@ -0,0 +1,60 @@ +# Build an image that can serve mlflow models. +FROM ubuntu:18.04 +RUN apt-get -y update +RUN apt-get install -y --no-install-recommends \ + wget \ + curl \ + nginx \ + ca-certificates \ + bzip2 \ + build-essential \ + cmake \ + openjdk-8-jdk \ + git-core \ + maven \ + && rm -rf /var/lib/apt/lists/* + +# Setup miniconda +RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh >> miniconda.sh +RUN bash ./miniconda.sh -b -p /miniconda && rm ./miniconda.sh +ENV PATH="/miniconda/bin:$PATH" + +ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 +ENV GUNICORN_CMD_ARGS="--timeout 60 -k gevent" +# Set up the program in the image +WORKDIR /opt/mlflow + +RUN pip install mlflow==1.26.1 +RUN mvn --batch-mode dependency:copy -Dartifact=org.mlflow:mlflow-scoring:1.26.1:pom -DoutputDirectory=/opt/java +RUN mvn --batch-mode dependency:copy -Dartifact=org.mlflow:mlflow-scoring:1.26.1:jar -DoutputDirectory=/opt/java/jars +RUN cp /opt/java/mlflow-scoring-1.26.1.pom /opt/java/pom.xml +RUN cd /opt/java && mvn --batch-mode dependency:copy-dependencies -DoutputDirectory=/opt/java/jars + +ARG MODEL_PATH +COPY $MODEL_PATH/model /opt/ml/model + +RUN echo 'import yaml\n\ +with open(r"/opt/ml/model/conda.yaml") as file:\n\ + f = yaml.load(file, Loader=yaml.FullLoader)\n\ +for index, item in enumerate(f["dependencies"]):\n\ + if type(item) is dict and item.get("pip") != None:\n\ + f["dependencies"][index]["pip"].append("protobuf==3.19.4")\n\ + with open(r"/opt/ml/model/conda.yaml", "w") as file:\n\ + yaml.dump(f, file)' >> /tmp/update_conda_yaml.py +RUN python /tmp/update_conda_yaml.py + +RUN python -c \ + 'from mlflow.models.container import _install_pyfunc_deps;\ + _install_pyfunc_deps(\ + "/opt/ml/model", \ + install_mlflow=False, \ + enable_mlserver=False, \ + env_manager="conda")' +ENV MLFLOW_DISABLE_ENV_CREATION="true" +ENV ENABLE_MLSERVER="False" + +# granting read/write access and conditional execution authority to all child directories +# and files to allow for deployment to AWS Sagemaker Serverless Endpoints +# (see https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints.html) +RUN chmod o+rwX /opt/mlflow/ +ENTRYPOINT ["python", "-c", "from mlflow.models import container as C;C._serve('conda')"] diff --git a/mlflow-pytorch/iris/README.md b/mlflow-pytorch/iris/README.md new file mode 100644 index 0000000..c488056 --- /dev/null +++ b/mlflow-pytorch/iris/README.md @@ -0,0 +1,44 @@ +Example taken from https://github.com/mlflow/mlflow/tree/master/examples/pytorch/torchscript/IrisClassification + +### Setup +1. Create a code with url- https://github.com/rahul-179/dkubeio-examples/tree/mlflow/mlflow branch `rm-mlflow` + +### Traning from VS code +1. Create a vs code IDE with pytorch 1.6 cpu image +2. cd to the code directory where we have the conda.yaml file +3. conda env create -f conda.yaml +4. conda activate pytorch-example +5. python iris_classification.py + +## Training outside VS code +1. Install Conda v4.9 or latest +2. Clone this repo and update the conda.yaml file +3. conda env create -f conda.yaml +4. conda activate pytorch-example +5. python iris_classification.py + +## Building Image outside dkube +1. Download the model to local directory +``` +mlflow artifacts download -r -d +eg: mlflow artifacts download -r c263bdaa-9505-4dd5-81fa-f9dbf40190fc -d ./output +``` +2. Update the conda.yaml file in the downloaded path and add protobuf==3.19.4 in pip dependenicies +3. Run the below command to build the image +``` +mlflow models build-docker -n -m +eg: mlflow models build-docker -n lucifer001/mlflow-pytorch-demo:demo1 -m output/model +``` +4.Push the image + +### Deployment +1. Select serving image which was build in the previous step. +2. Serving Port: 8000 +3. Serving Url Prefix: /invocations +4. Min CPU/Max CPU: 1 +5. Min Memory/Max Memory: 5G + +### Prediction +1. Copy the curl command from the deployment page and append --insecure +2. Change the data section to +--data-raw '{ "instances": [4.4, 3, 1.3, 0.2] }' diff --git a/mlflow-pytorch/iris/conda.yaml b/mlflow-pytorch/iris/conda.yaml new file mode 100644 index 0000000..a9e3be7 --- /dev/null +++ b/mlflow-pytorch/iris/conda.yaml @@ -0,0 +1,17 @@ +name: pytorch-example +channels: +- conda-forge +dependencies: +- python=3.8.2 +- pip +- pip: + - sklearn + - cloudpickle==1.6.0 + - boto3 + - torchvision>=0.9.1 + - torch>=1.9.0 + - mlflow +variables: + MLFLOW_TRACKING_INSECURE_TLS: "true" + MLFLOW_TRACKING_URI: ":32222>" + MLFLOW_TRACKING_TOKEN: "" \ No newline at end of file diff --git a/mlflow-pytorch/iris/iris_classification.py b/mlflow-pytorch/iris/iris_classification.py new file mode 100644 index 0000000..503f249 --- /dev/null +++ b/mlflow-pytorch/iris/iris_classification.py @@ -0,0 +1,103 @@ +# pylint: disable=abstract-method +import argparse +import torch +import torch.nn as nn +import torch.nn.functional as F +import os +from sklearn.datasets import load_iris +from sklearn.metrics import accuracy_score +from sklearn.model_selection import train_test_split + +import mlflow.pytorch + + +class IrisClassifier(nn.Module): + def __init__(self): + super(IrisClassifier, self).__init__() + self.fc1 = nn.Linear(4, 10) + self.fc2 = nn.Linear(10, 10) + self.fc3 = nn.Linear(10, 3) + + def forward(self, x): + x = F.relu(self.fc1(x.double())) + x = F.relu(self.fc2(x.double())) + x = F.dropout(x.double(), 0.2) + x = self.fc3(x.double()) + return x + + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + +def prepare_data(): + iris = load_iris() + data = iris.data + labels = iris.target + target_names = iris.target_names + + X_train, X_test, y_train, y_test = train_test_split( + data, labels, test_size=0.2, random_state=42, shuffle=True, stratify=labels + ) + + X_train = torch.FloatTensor(X_train).to(device) + X_test = torch.FloatTensor(X_test).to(device) + y_train = torch.LongTensor(y_train).to(device) + y_test = torch.LongTensor(y_test).to(device) + + return X_train, X_test, y_train, y_test, target_names + + +def train_model(model, epochs, X_train, y_train): + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters(), lr=0.01) + + for epoch in range(epochs): + out = model(X_train.double()) + loss = criterion(out, y_train).to(device) + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if epoch % 10 == 0: + print("number of epoch", epoch, "loss", float(loss)) + + return model + + +def test_model(model, X_test, y_test): + model.eval() + with torch.no_grad(): + predict_out = model(X_test.double()) + _, predict_y = torch.max(predict_out, 1) + + print("\nprediction accuracy", float(accuracy_score(y_test.cpu(), predict_y.cpu()))) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Iris Classification Torchscripted model") + + parser.add_argument( + "--epochs", type=int, default=100, help="number of epochs to run (default: 100)" + ) + + args = parser.parse_args() + + model = IrisClassifier() + model.double() + model = model.to(device) + X_train, X_test, y_train, y_test, target_names = prepare_data() + scripted_model = torch.jit.script(model) # scripting the model + scripted_model = train_model(scripted_model, args.epochs, X_train, y_train) + test_model(scripted_model, X_test, y_test) + + with mlflow.start_run() as run: + mlflow.pytorch.log_model(scripted_model, "model") # logging scripted model + model_path = mlflow.get_artifact_uri("model") + loaded_pytorch_model = mlflow.pytorch.load_model(model_path) # loading scripted model + model.eval() + with torch.no_grad(): + test_datapoint = torch.Tensor([4.4000, 3.0000, 1.3000, 0.2000]).to(device) + prediction = loaded_pytorch_model(test_datapoint) + actual = "setosa" + predicted = target_names[torch.argmax(prediction)] + print("\nPREDICTION RESULT: ACTUAL: {}, PREDICTED: {}".format(actual, predicted)) diff --git a/mlflow/README.md b/mlflow/README.md index 2c11fff..2ab1b85 100644 --- a/mlflow/README.md +++ b/mlflow/README.md @@ -3,26 +3,66 @@ Example taken from https://github.com/mlflow/mlflow/tree/master/examples/tensorf ### Setup 1. Create a code with url- https://github.com/oneconvergence/dkubeio-examples/tree/mlflow/mlflow branch -mlflow -### Traning from VS code +### Training with Conda environment 1. Create a vs code IDE with tensorflow 2.6.0 cpu image 2. cd to the code directory where we have the conda.yaml file and update conda.yaml. 3. conda env create -f conda.yaml 4. conda activate tensorflow-example 5. python train_predict.py -### Traning outside dkube -1. Install Conda v4.9 or latest -2. Clone this repo and update the conda.yaml file -3. conda env create -f conda.yaml -4. conda activate tensorflow-example -5. python train_predict.py +### Training from VS code inside DKube +1. Create a vs code IDE with tensorflow 2.6.0 cpu image +2. cd to the code directory where we have the requirements.txt file +3. sudo apt-get update -y; sudo apt-get install -y make build-essential libssl-dev zlib1g-dev \ +libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev \ +libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev python-openssl +4. curl https://pyenv.run | bash +5. exec "$SHELL" +6. export PATH=$HOME/.pyenv/bin:$PATH +7. pyenv install 3.7.2 +8. pyenv virtualenv 3.7.2 env +9. source $HOME/.pyenv/versions/env/bin/activate +10. pip install --upgrade pip +11. pip install -r requirements.txt +12. export MLFLOW_TRACKING_INSECURE_TLS="true" +13. export MLFLOW_TRACKING_URI=":32222>" +14. export MLFLOW_TRACKING_TOKEN="" +15. python train_predict.py -### Building Image in dkube +### Traning from VS code outside DKube +2. cd to the code directory where we have the requirements.txt file +3. pip3 install virtualenv +4. export PATH=$PATH:$HOME/.local/bin +5. export MLFLOW_TRACKING_INSECURE_TLS="true" +6. export MLFLOW_TRACKING_URI=":32222>" +7. export MLFLOW_TRACKING_TOKEN="" +8. virtualenv -p python3 env +9. . env/bin/activate +10. pip3 install -r requirements.txt +11. python train_predict.py + +`Note: Python 3.7 or higher version is required` + +### Building Image inside DKube 1. Go to the model details page which was given as output in the above training run. A new version will be there in the version list. 2. Click on the build model image icon which is on the version's row at the right. 3. Select code 4. Select registry 5. Submit to create image build + +### Building Image outside DKube +1. Download the model to local directory +``` +mlflow artifacts download -r -d +eg: mlflow artifacts download -r c263bdaa-9505-4dd5-81fa-f9dbf40190fc -d ./output +``` +2. Update the conda.yaml file in the downloaded path and add protobuf==3.19.4 in pip dependenicies +3. Run the below command to build the image +``` +mlflow models build-docker -n -m /decision-tree-classifier +eg: mlflow models build-docker -n lucifer001/mlflow-sklearn-demo:demo1 -m output/decision-tree-classifier +``` +4.Push the image ## Building Image outside dkube 1. Download the model to local directory @@ -52,4 +92,3 @@ eg: mlflow models build-docker -n lucifer001/mlflow-sklearn-demo:demo1 -m output "columns": ["SepalLength", "SepalWidth", "PetalLength", "PetalWidth"], "data": [[5.1, 3.3, 1.7, 0.5], [5.9, 3.0, 4.2, 1.5], [6.9, 3.1, 5.4, 2.1]] }' - diff --git a/mlflow/requirements.txt b/mlflow/requirements.txt new file mode 100644 index 0000000..45278e1 --- /dev/null +++ b/mlflow/requirements.txt @@ -0,0 +1,5 @@ +mlflow +tensorflow==2.0.0 +protobuf==3.19.4 +git+https://github.com/oneconvergence/dkube.git@3.3.mp +boto3 \ No newline at end of file diff --git a/mlflow/train_predict.py b/mlflow/train_predict.py index c4a817a..51a0f11 100644 --- a/mlflow/train_predict.py +++ b/mlflow/train_predict.py @@ -176,4 +176,3 @@ def main(argv): if __name__ == "__main__": main(sys.argv) -