neptune-ai · cmcconomy · Mar 6, 2021 · Mar 6, 2021 · jakubczakon · Mar 22, 2021
diff --git a/Makefile b/Makefile
@@ -1,10 +1,9 @@
 # create env
-conda env create -f environment.yml
+conda env create -f environment.yml # --force # -vvv
 
 # create directories
-mkdir data
-mkdir data/raw data/meta data/experiments
+mkdir -p data/raw data/meta data/experiments
 
 # set default env variable for NEPTUNE_API_TOKEN and CONFIG_PATH
 export NEPTUNE_API_TOKEN=eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5tbCIsImFwaV9rZXkiOiJiNzA2YmM4Zi03NmY5LTRjMmUtOTM5ZC00YmEwMzZmOTMyZTQifQ==
-export CONFIG_PATH=neptune.yaml
+export CONFIG_PATH=neptune.yaml
diff --git a/REPRODUCE_RESULTS.md b/REPRODUCE_RESULTS.md
@@ -97,7 +97,7 @@ project
     |-- experiments
         |-- mapping_challenge_baseline # this is where your experiment files will be dumped
             |-- checkpoints # neural network checkpoints
-            |-- transformers # serialized transformers after fitting
+            |-- transformers # serialized transformers after fitting ('unet' and 'scoring_model' go here!)
             |-- outputs # outputs of transformers if you specified save_output=True anywhere
             |-- prediction.json # prediction on valid
 ```
@@ -135,7 +135,8 @@ python main.py train --pipeline_name unet_weighted
 
 **NOTE**
 
-Model weights for the winning solution are available [here](https://ui.neptune.ai/o/neptune-ai/org/Mapping-Challenge/e/MC-1057/artifacts)
+Model weights for the winning solution are available [here](https://ui.neptune.ai/o/neptune-ai/org/Mapping-Challenge/e/MC-1057/artifacts)  
+They must be placed in the `./data/experiments/mapping_challenge_baseline/transformers/` folder.
 
 ### Second level model (optional)
 This will train a lightgbm to be able to get the best threshold.

diff --git a/environment.yml b/environment.yml
@@ -1,27 +1,26 @@
 name: mapping
 
 dependencies:
-  - pip=19.1.1
+  - pip=21.0.1
   - python=3.6.8
   - psutil
   - matplotlib
   - scikit-image
   - lightgbm=2.2.1
+  - cython=0.28.2
 
   - pip:
-      - click==6.7
       - tqdm==4.23.0
       - pydot_ng==1.0.0
       - git+https://github.com/lucasb-eyer/pydensecrf.git
       - xgboost==0.90
-      - neptune-client==0.3.0
-      - neptune-contrib==0.9.2
+      - neptune-client==0.3.0 #installs click==7.1.2 !
+      - neptune-contrib==0.9.2 #joblib-1.0.1->0.13.2, Pillow-8.1.1->5.4.1, matplotlib-3.3.4->3.2.2
       - imgaug==0.2.5
       - opencv_python==3.4.0.12
       - torch==0.3.1
       - torchvision==0.2.0
       - pretrainedmodels==0.7.0
-      - pandas==0.24.2
-      - numpy==1.16.4
-      - cython==0.28.2
+      - pandas==1.1.5 # we cannot install 0.24.2 .. other packages require 1.1.5 
+      - numpy==1.16.4 # some other packages installed version 1.19.2 but the 'squeeze' method is removed somewhere in later versions. we must have this version.
       - pycocotools==2.0.0
diff --git a/run.sh b/run.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# Provided to facilitate running the 'Predict on new data' step in the REPRODUCE file.
+export NEPTUNE_API_TOKEN=eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5tbCIsImFwaV9rZXkiOiJiNzA2YmM4Zi03NmY5LTRjMmUtOTM5ZC00YmEwMzZmOTMyZTQifQ==
+export CONFIG_PATH=neptune.yaml
+
+python main.py predict-on-dir \
+--pipeline_name unet_tta_scoring_model \
+--chunk_size 1000 \
+--dir_path images/ \
+--prediction_path images/predictions.json
diff --git a/src/loaders.py b/src/loaders.py
@@ -10,7 +10,7 @@
 from PIL import Image
 import pandas as pd
 from torch.utils.data import Dataset, DataLoader
-from sklearn.externals import joblib
+import joblib
 from skimage.transform import rotate
 from scipy.stats import gmean
 

diff --git a/src/models.py b/src/models.py
@@ -6,7 +6,7 @@
 from torch import optim
 import pandas as pd
 from sklearn.model_selection import train_test_split
-from sklearn.externals import joblib
+import joblib
 from sklearn.ensemble import RandomForestRegressor
 
 from .callbacks import NeptuneMonitorSegmentation, ValidationMonitorSegmentation
@@ -459,4 +459,4 @@ def _convert_features_to_df(features):
     for image_features in features:
         for layer_features in image_features[1:]:
             df_features.append(layer_features)
-    return pd.concat(df_features)
+    return pd.concat(df_features)
diff --git a/src/preparation.py b/src/preparation.py
@@ -8,7 +8,7 @@
 from pycocotools.coco import COCO
 from skimage.morphology import binary_erosion, rectangle, binary_dilation
 from scipy.ndimage.morphology import distance_transform_edt
-from sklearn.externals import joblib
+import joblib
 
 from .utils import get_logger, add_dropped_objects, label
 

diff --git a/src/steps/base.py b/src/steps/base.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 from scipy import sparse
-from sklearn.externals import joblib
+import joblib
 
 from .utils import view_graph, plot_graph, get_logger, initialize_logger
 

diff --git a/src/steps/keras/embeddings.py b/src/steps/keras/embeddings.py
@@ -1,6 +1,6 @@
 import numpy as np
 from gensim.models import KeyedVectors
-from sklearn.externals import joblib
+import joblib
 
 from ..base import BaseTransformer
 

diff --git a/src/steps/keras/loaders.py b/src/steps/keras/loaders.py
@@ -1,5 +1,5 @@
 from keras.preprocessing import text, sequence
-from sklearn.externals import joblib
+import joblib
 
 from ..base import BaseTransformer
 

diff --git a/src/steps/misc.py b/src/steps/misc.py
@@ -1,6 +1,6 @@
 import lightgbm as lgb
 from attrdict import AttrDict
-from sklearn.externals import joblib
+import joblib
 
 from .base import BaseTransformer
 from .utils import get_logger

diff --git a/src/steps/postprocessing.py b/src/steps/postprocessing.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pandas as pd
-from sklearn.externals import joblib
+import joblib
 
 from .base import BaseTransformer
 

diff --git a/src/steps/preprocessing/misc.py b/src/steps/preprocessing/misc.py
@@ -1,4 +1,4 @@
-from sklearn.externals import joblib
+import joblib
 
 from ..base import BaseTransformer
 
@@ -28,4 +28,4 @@ def save(self, filepath):
         params = {'x_columns': self.x_columns,
                   'y_columns': self.y_columns
                   }
-        joblib.dump(params, filepath)
+        joblib.dump(params, filepath)
diff --git a/src/steps/preprocessing/text.py b/src/steps/preprocessing/text.py
@@ -8,7 +8,7 @@
 from nltk.corpus import stopwords
 from nltk.stem.wordnet import WordNetLemmatizer
 from nltk.tokenize import TweetTokenizer
-from sklearn.externals import joblib
+import joblib
 
 from ..base import BaseTransformer
 

diff --git a/src/steps/pytorch/loaders.py b/src/steps/pytorch/loaders.py
@@ -4,7 +4,7 @@
 import torch
 import torchvision.transforms as transforms
 from PIL import Image
-from sklearn.externals import joblib
+import joblib
 from torch.utils.data import Dataset, DataLoader
 
 from ..base import BaseTransformer

diff --git a/src/steps/sklearn/models.py b/src/steps/sklearn/models.py
@@ -5,7 +5,7 @@
 #from catboost import CatBoostClassifier
 from sklearn import ensemble
 from sklearn import svm
-from sklearn.externals import joblib
+import joblib
 from xgboost import XGBClassifier
 
 from ..base import BaseTransformer

diff --git a/src/utils.py b/src/utils.py
@@ -129,7 +129,7 @@ def bounding_box_from_rle(rle):
 
 def read_config(config_path):
     with open(config_path) as f:
-        config = yaml.load(f)
+        config = yaml.safe_load(f)
     return AttrDict(config)