diff --git a/Makefile b/Makefile index 19bb7af..fd2506a 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,9 @@ # create env -conda env create -f environment.yml +conda env create -f environment.yml # --force # -vvv # create directories -mkdir data -mkdir data/raw data/meta data/experiments +mkdir -p data/raw data/meta data/experiments # set default env variable for NEPTUNE_API_TOKEN and CONFIG_PATH export NEPTUNE_API_TOKEN=eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5tbCIsImFwaV9rZXkiOiJiNzA2YmM4Zi03NmY5LTRjMmUtOTM5ZC00YmEwMzZmOTMyZTQifQ== -export CONFIG_PATH=neptune.yaml \ No newline at end of file +export CONFIG_PATH=neptune.yaml diff --git a/REPRODUCE_RESULTS.md b/REPRODUCE_RESULTS.md index d4ab8f2..604ef8e 100644 --- a/REPRODUCE_RESULTS.md +++ b/REPRODUCE_RESULTS.md @@ -97,7 +97,7 @@ project |-- experiments |-- mapping_challenge_baseline # this is where your experiment files will be dumped |-- checkpoints # neural network checkpoints - |-- transformers # serialized transformers after fitting + |-- transformers # serialized transformers after fitting ('unet' and 'scoring_model' go here!) |-- outputs # outputs of transformers if you specified save_output=True anywhere |-- prediction.json # prediction on valid ``` @@ -135,7 +135,8 @@ python main.py train --pipeline_name unet_weighted **NOTE** -Model weights for the winning solution are available [here](https://ui.neptune.ai/o/neptune-ai/org/Mapping-Challenge/e/MC-1057/artifacts) +Model weights for the winning solution are available [here](https://ui.neptune.ai/o/neptune-ai/org/Mapping-Challenge/e/MC-1057/artifacts) +They must be placed in the `./data/experiments/mapping_challenge_baseline/transformers/` folder. ### Second level model (optional) This will train a lightgbm to be able to get the best threshold. diff --git a/environment.yml b/environment.yml index 9fe385b..e531858 100644 --- a/environment.yml +++ b/environment.yml @@ -1,27 +1,26 @@ name: mapping dependencies: - - pip=19.1.1 + - pip=21.0.1 - python=3.6.8 - psutil - matplotlib - scikit-image - lightgbm=2.2.1 + - cython=0.28.2 - pip: - - click==6.7 - tqdm==4.23.0 - pydot_ng==1.0.0 - git+https://github.com/lucasb-eyer/pydensecrf.git - xgboost==0.90 - - neptune-client==0.3.0 - - neptune-contrib==0.9.2 + - neptune-client==0.3.0 #installs click==7.1.2 ! + - neptune-contrib==0.9.2 #joblib-1.0.1->0.13.2, Pillow-8.1.1->5.4.1, matplotlib-3.3.4->3.2.2 - imgaug==0.2.5 - opencv_python==3.4.0.12 - torch==0.3.1 - torchvision==0.2.0 - pretrainedmodels==0.7.0 - - pandas==0.24.2 - - numpy==1.16.4 - - cython==0.28.2 + - pandas==1.1.5 # we cannot install 0.24.2 .. other packages require 1.1.5 + - numpy==1.16.4 # some other packages installed version 1.19.2 but the 'squeeze' method is removed somewhere in later versions. we must have this version. - pycocotools==2.0.0 diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..4195098 --- /dev/null +++ b/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Provided to facilitate running the 'Predict on new data' step in the REPRODUCE file. +export NEPTUNE_API_TOKEN=eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5tbCIsImFwaV9rZXkiOiJiNzA2YmM4Zi03NmY5LTRjMmUtOTM5ZC00YmEwMzZmOTMyZTQifQ== +export CONFIG_PATH=neptune.yaml + +python main.py predict-on-dir \ +--pipeline_name unet_tta_scoring_model \ +--chunk_size 1000 \ +--dir_path images/ \ +--prediction_path images/predictions.json diff --git a/src/loaders.py b/src/loaders.py index 7d44fb6..16d4af0 100644 --- a/src/loaders.py +++ b/src/loaders.py @@ -10,7 +10,7 @@ from PIL import Image import pandas as pd from torch.utils.data import Dataset, DataLoader -from sklearn.externals import joblib +import joblib from skimage.transform import rotate from scipy.stats import gmean diff --git a/src/models.py b/src/models.py index e96dc0b..641c464 100644 --- a/src/models.py +++ b/src/models.py @@ -6,7 +6,7 @@ from torch import optim import pandas as pd from sklearn.model_selection import train_test_split -from sklearn.externals import joblib +import joblib from sklearn.ensemble import RandomForestRegressor from .callbacks import NeptuneMonitorSegmentation, ValidationMonitorSegmentation @@ -459,4 +459,4 @@ def _convert_features_to_df(features): for image_features in features: for layer_features in image_features[1:]: df_features.append(layer_features) - return pd.concat(df_features) \ No newline at end of file + return pd.concat(df_features) diff --git a/src/preparation.py b/src/preparation.py index d4c5032..b135528 100644 --- a/src/preparation.py +++ b/src/preparation.py @@ -8,7 +8,7 @@ from pycocotools.coco import COCO from skimage.morphology import binary_erosion, rectangle, binary_dilation from scipy.ndimage.morphology import distance_transform_edt -from sklearn.externals import joblib +import joblib from .utils import get_logger, add_dropped_objects, label diff --git a/src/steps/base.py b/src/steps/base.py index 0698c4f..4933fd8 100644 --- a/src/steps/base.py +++ b/src/steps/base.py @@ -4,7 +4,7 @@ import numpy as np from scipy import sparse -from sklearn.externals import joblib +import joblib from .utils import view_graph, plot_graph, get_logger, initialize_logger diff --git a/src/steps/keras/embeddings.py b/src/steps/keras/embeddings.py index 4eaaeea..7702c78 100644 --- a/src/steps/keras/embeddings.py +++ b/src/steps/keras/embeddings.py @@ -1,6 +1,6 @@ import numpy as np from gensim.models import KeyedVectors -from sklearn.externals import joblib +import joblib from ..base import BaseTransformer diff --git a/src/steps/keras/loaders.py b/src/steps/keras/loaders.py index a2e89b3..2953b20 100644 --- a/src/steps/keras/loaders.py +++ b/src/steps/keras/loaders.py @@ -1,5 +1,5 @@ from keras.preprocessing import text, sequence -from sklearn.externals import joblib +import joblib from ..base import BaseTransformer diff --git a/src/steps/misc.py b/src/steps/misc.py index 7dd65e2..2cb0b40 100644 --- a/src/steps/misc.py +++ b/src/steps/misc.py @@ -1,6 +1,6 @@ import lightgbm as lgb from attrdict import AttrDict -from sklearn.externals import joblib +import joblib from .base import BaseTransformer from .utils import get_logger diff --git a/src/steps/postprocessing.py b/src/steps/postprocessing.py index 2ec9529..74ab398 100644 --- a/src/steps/postprocessing.py +++ b/src/steps/postprocessing.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from sklearn.externals import joblib +import joblib from .base import BaseTransformer diff --git a/src/steps/preprocessing/misc.py b/src/steps/preprocessing/misc.py index 8ae554d..f14bae4 100644 --- a/src/steps/preprocessing/misc.py +++ b/src/steps/preprocessing/misc.py @@ -1,4 +1,4 @@ -from sklearn.externals import joblib +import joblib from ..base import BaseTransformer @@ -28,4 +28,4 @@ def save(self, filepath): params = {'x_columns': self.x_columns, 'y_columns': self.y_columns } - joblib.dump(params, filepath) \ No newline at end of file + joblib.dump(params, filepath) diff --git a/src/steps/preprocessing/text.py b/src/steps/preprocessing/text.py index 5887be3..f8d6878 100644 --- a/src/steps/preprocessing/text.py +++ b/src/steps/preprocessing/text.py @@ -8,7 +8,7 @@ from nltk.corpus import stopwords from nltk.stem.wordnet import WordNetLemmatizer from nltk.tokenize import TweetTokenizer -from sklearn.externals import joblib +import joblib from ..base import BaseTransformer diff --git a/src/steps/pytorch/loaders.py b/src/steps/pytorch/loaders.py index a9ef7ef..c4db499 100644 --- a/src/steps/pytorch/loaders.py +++ b/src/steps/pytorch/loaders.py @@ -4,7 +4,7 @@ import torch import torchvision.transforms as transforms from PIL import Image -from sklearn.externals import joblib +import joblib from torch.utils.data import Dataset, DataLoader from ..base import BaseTransformer diff --git a/src/steps/sklearn/models.py b/src/steps/sklearn/models.py index 61bbf19..c154622 100644 --- a/src/steps/sklearn/models.py +++ b/src/steps/sklearn/models.py @@ -5,7 +5,7 @@ #from catboost import CatBoostClassifier from sklearn import ensemble from sklearn import svm -from sklearn.externals import joblib +import joblib from xgboost import XGBClassifier from ..base import BaseTransformer diff --git a/src/utils.py b/src/utils.py index ddd442b..0f4fe41 100644 --- a/src/utils.py +++ b/src/utils.py @@ -129,7 +129,7 @@ def bounding_box_from_rle(rle): def read_config(config_path): with open(config_path) as f: - config = yaml.load(f) + config = yaml.safe_load(f) return AttrDict(config)