Hotfix/181 hotfix for supercomp (#182)

ditwrd · Aditya Wardianto · web-flow · commit 9269610fc073 · 2023-07-23T16:24:24.000+07:00
* fix: remove agatston zero

* fix: added missing img_arr for extract dcm in ground truth cac calculation

* fix: remove comment for writing tfrecord

* ref: use base net to remove mobilenet backend

* ref: use dice_coef

* ref: speedtest for basic

* fix: remove unused imports

* fix: custom objects

* chore: loss cleanup

* chore: model cleanup

---------

Co-authored-by: Aditya Wardianto &lt;aditya.wardianto@zero-one-group.com&gt;
diff --git a/notebooks/speed.py b/notebooks/speed.py
@@ -8,7 +8,6 @@
 import pydicom as pdc
 import sklearn.metrics as skm
 import tensorflow as tf
-from keras.utils.layer_utils import count_params
 from tqdm import tqdm
 
 sys.path.append(pathlib.Path.cwd().parent.as_posix())
@@ -17,8 +16,12 @@
 from src.models.lib.builder import build_unet_pp
 from src.models.lib.config import UNetPPConfig
 from src.models.lib.data_loader import create_dataset, preprocess_img
-from src.models.lib.loss import (dice_coef, dice_coef_nosq, log_cosh_dice_loss,
-                                 log_cosh_dice_loss_nosq)
+from src.models.lib.loss import (
+    dice_coef,
+    dice_coef_nosq,
+    log_cosh_dice_loss,
+    log_cosh_dice_loss_nosq,
+)
 from src.models.lib.utils import loss_dict_gen
 from src.system.pipeline.output import auto_cac, ground_truth_auto_cac
 
@@ -36,13 +39,13 @@
     selected_model_path,
     custom_objects={
         "log_cosh_dice_loss": loss_func,
-        "dice_coef_nosq": dice_coef_nosq,
+        "dice_coef": dice_coef,
     },
 )
 
 model_depth = 5
 depth = int(sys.argv[3])
-filter_list = [16, 32, 64, 128, 256]
+# filter_list = [16, 32, 64, 128, 256]
 
 
 pruned_model = {}
@@ -53,16 +56,29 @@
 
 model_config = UNetPPConfig(
     model_name=f"model_d{depth}",
-    upsample_mode="transpose",
-    depth=depth + 1,
     input_dim=[512, 512, 1],
     batch_norm=True,
-    deep_supervision=False,
     model_mode="basic",
+    depth=5,
     n_class={"bin": 1},
-    filter_list=filter_list[: depth + 1],
+    deep_supervision=False,
+    upsample_mode="transpose",
+    filter_list=[32, 64, 128, 256, 512],
 )
 
+
+# model_config = UNetPPConfig(
+#     model_name=f"model_d{depth}",
+#     upsample_mode="transpose",
+#     depth=depth + 1,
+#     input_dim=[512, 512, 1],
+#     batch_norm=True,
+#     deep_supervision=False,
+#     model_mode="basic",
+#     n_class={"bin": 1},
+#     filter_list=filter_list[: depth + 1],
+# )
+
 model, output_layer_name = build_unet_pp(model_config, custom=True)
 
 print(f"-- Creating pruned model d{depth}")
diff --git a/src/data/preprocess/lib/image.py b/src/data/preprocess/lib/image.py
@@ -7,20 +7,11 @@
 sys.path.append(pathlib.Path.cwd().as_posix())
 
 from src.data.preprocess.lib.utils import (  # pylint: disable=wrong-import-position,import-error
-    artery_loc_to_abbr,
-    blacklist_agatston_zero,
-    blacklist_invalid_dicom,
-    blacklist_mislabelled_roi,
-    blacklist_multiple_image_id,
-    blacklist_multiple_image_id_with_roi,
-    blacklist_neg_reverse_index,
-    blacklist_no_image,
-    blacklist_pixel_overlap,
-    convert_abr_to_num,
-    fill_segmentation,
-    string_to_float_tuple,
-    string_to_int_tuple,
-)
+    artery_loc_to_abbr, blacklist_agatston_zero, blacklist_invalid_dicom,
+    blacklist_mislabelled_roi, blacklist_multiple_image_id,
+    blacklist_multiple_image_id_with_roi, blacklist_neg_reverse_index,
+    blacklist_no_image, blacklist_pixel_overlap, convert_abr_to_num,
+    fill_segmentation, string_to_float_tuple, string_to_int_tuple)
 
 
 def extract_patient_dicom_path(gated_path: pathlib.Path):
@@ -54,7 +45,6 @@ def extract_patient_dicom_path(gated_path: pathlib.Path):
             or patient_number in blacklist_invalid_dicom()
             or patient_number in blacklist_no_image()
             or patient_number in blacklist_neg_reverse_index()
-            or patient_number in blacklist_agatston_zero()
         ):
             continue
 
diff --git a/src/data/preprocess/pipeline/tfrecord.py b/src/data/preprocess/pipeline/tfrecord.py
@@ -11,28 +11,16 @@
 
 sys.path.append(pathlib.Path.cwd().as_posix())
 
-from src.data.preprocess.lib.tfrecord import (
-    create_example_fn,
-)  # pylint: disable=wrong-import-position,import-error
+from src.data.preprocess.lib.tfrecord import \
+    create_example_fn  # pylint: disable=wrong-import-position,import-error
 from src.data.preprocess.lib.utils import (  # pylint: disable=wrong-import-position,import-error
-    artery_loc_to_abbr,
-    blacklist_agatston_zero,
-    blacklist_invalid_dicom,
-    blacklist_mislabelled_roi,
-    blacklist_multiple_image_id,
-    blacklist_multiple_image_id_with_roi,
-    blacklist_neg_reverse_index,
-    blacklist_no_image,
-    blacklist_pixel_overlap,
-    convert_abr_to_num,
-    fill_segmentation,
-    get_patient_split,
-    get_pos_from_bin_list,
-    get_pos_from_mult_list,
-    split_list,
-    string_to_float_tuple,
-    string_to_int_tuple,
-)
+    artery_loc_to_abbr, blacklist_agatston_zero, blacklist_invalid_dicom,
+    blacklist_mislabelled_roi, blacklist_multiple_image_id,
+    blacklist_multiple_image_id_with_roi, blacklist_neg_reverse_index,
+    blacklist_no_image, blacklist_pixel_overlap, convert_abr_to_num,
+    fill_segmentation, get_patient_split, get_pos_from_bin_list,
+    get_pos_from_mult_list, split_list, string_to_float_tuple,
+    string_to_int_tuple)
 
 
 def combine_to_tfrecord(
@@ -188,12 +176,12 @@ def combine_to_tfrecord(
                                         + 512 * 512
                                         - patient_dict["mult_seg"].shape[0]
                                     )
-                                    # patient_dict["img"] = indexer[patient_index]["img"][
-                                    #     img_index
-                                    # ]["img_hu"][:]
-                                    #
-                                    # example = create_example_fn(patient_dict)
-                                    # tf_record_file.write(example.SerializeToString())
+                                    patient_dict["img"] = indexer[patient_index]["img"][
+                                        img_index
+                                    ]["img_hu"][:]
+
+                                    example = create_example_fn(patient_dict)
+                                    tf_record_file.write(example.SerializeToString())
                                 else:
                                     log_key = f"{split_mode}-img-non-cac"
                                     if split_mode == "train":
@@ -216,29 +204,27 @@ def combine_to_tfrecord(
                                                     )
                                                     + 512 * 512
                                                 )
-                                        # patient_dict["img"] = indexer[
-                                        #     patient_index
-                                        # ]["img"][img_index]["img_hu"][:]
-                                        # #
-                                        # example = create_example_fn(
-                                        #     patient_dict
-                                        # )
-                                        # tf_record_file.write(
-                                        #     example.SerializeToString()
-                                        # )
+                                        patient_dict["img"] = indexer[patient_index][
+                                            "img"
+                                        ][img_index]["img_hu"][:]
+                                        #
+                                        example = create_example_fn(patient_dict)
+                                        tf_record_file.write(
+                                            example.SerializeToString()
+                                        )
                                     else:
                                         log[log_key] = log.get(log_key, 0) + 1
                                         log[log_key + " non_cac_pixel"] = (
                                             log.get(log_key + " non_cac_pixel", 0)
                                             + 512 * 512
                                         )
-                                        # patient_dict["img"] = indexer[patient_index][
-                                        #     "img"
-                                        # ][img_index]["img_hu"][:]
-                                        # example = create_example_fn(patient_dict)
-                                        # tf_record_file.write(
-                                        #     example.SerializeToString()
-                                        # )
+                                        patient_dict["img"] = indexer[patient_index][
+                                            "img"
+                                        ][img_index]["img_hu"][:]
+                                        example = create_example_fn(patient_dict)
+                                        tf_record_file.write(
+                                            example.SerializeToString()
+                                        )
 
                                 # Over sample algorithmm
                                 # CAC = 2391
diff --git a/src/models/lib/base.py b/src/models/lib/base.py
@@ -175,7 +175,7 @@ def base_unet_pp(config: UNetPPConfig):
                 output_lists[index] for index in non_deep_supervision_output_index[1:]
             ],
         ),
-        output_layer_name[-1]
+        [output_layer_name[-1]]
         if n_head == 1
         else [
             output_layer_name[index] for index in non_deep_supervision_output_index[1:]
diff --git a/src/models/lib/builder.py b/src/models/lib/builder.py
@@ -2,7 +2,8 @@
 import pathlib
 import sys
 
-from tensorflow import keras  # pylint: disable=wrong-import-position,import-error
+from tensorflow import \
+    keras  # pylint: disable=wrong-import-position,import-error
 
 sys.path.append(pathlib.Path.cwd().parent.as_posix())
 from src.models.lib.base import base_unet_pp, unetpp_mobile_backend
@@ -37,6 +38,7 @@ def build_unet_pp(config: UNetPPConfig, custom: bool = False) -> keras.Model:
         else:
             raise ValueError(f"Invalid model mode: {config.model_mode}")
 
+        # return base_unet_pp(config)
         return unetpp_mobile_backend(config)
 
     if config.model_mode == "basic":
diff --git a/src/models/lib/loss.py b/src/models/lib/loss.py
@@ -4,37 +4,6 @@
 
 
 def categorical_focal_loss(alpha=0.25, gamma=2.0):
-    """
-    https://github.com/umbertogriffo/focal-loss-keras
-
-    Softmax version of focal loss.
-    When there is a skew between different categories/labels in your data set, you can try to apply this function as a
-    loss.
-           m
-      FL = ∑  -alpha * (1 - p_o,c)^gamma * y_o,c * log(p_o,c)
-          c=1
-
-      where m = number of classes, c = class and o = observation
-
-    Parameters:
-      alpha -- the same as weighing factor in balanced cross entropy. Alpha is used to specify the weight of different
-      categories/labels, the size of the array needs to be consistent with the number of classes.
-      gamma -- focusing parameter for modulating factor (1-p)
-
-    Default value:
-      gamma -- 2.0 as mentioned in the paper
-      alpha -- 0.25 as mentioned in the paper
-
-    References:
-        Official paper: https://arxiv.org/pdf/1708.02002.pdf
-        https://www.tensorflow.org/api_docs/python/tf/keras/backend/categorical_crossentropy
-
-    Usage:
-     model.compile(loss=[categorical_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=adam)
-    """
-
-    # def categorical_focal_loss_fixed(y_true, y_pred):
-
     def focal_loss_fixed(y_true, y_pred):
         pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
         pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
@@ -43,33 +12,6 @@ def focal_loss_fixed(y_true, y_pred):
         ) - K.mean((1 - alpha) * K.pow(pt_0, gamma) * K.log(1.0 - pt_0 + K.epsilon()))
 
     return focal_loss_fixed
-    # """
-    # :param y_true: A tensor of the same shape as `y_pred`
-    # :param y_pred: A tensor resulting from a softmax
-    # :return: Output tensor.
-    # """
-    # y_true = tf.cast(y_true, tf.float32)
-    # # Define epsilon so that the back-propagation will not result in NaN for 0 divisor case
-    # epsilon = K.epsilon()
-    # # Add the epsilon to prediction value
-    # # y_pred = y_pred + epsilon
-    # # Clip the prediciton value
-    # y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon)
-    # # Calculate p_t
-    # p_t = tf.where(K.equal(y_true, 1), y_pred, 1 - y_pred)
-    # # Calculate alpha_t
-    # alpha_factor = K.ones_like(y_true) * alpha
-    # alpha_t = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor)
-    # # Calculate cross entropy
-    # cross_entropy = -K.log(p_t)
-    # weight = alpha_t * K.pow((1 - p_t), gamma)
-    # # Calculate focal loss
-    # loss = weight * cross_entropy
-    # # Sum the losses in mini_batch
-    # loss = K.mean(K.sum(loss, axis=-1))
-    # return loss
-
-    # return tf.keras.losses.BinaryFocalCrossentropy(alpha=alpha, gamma=gamma)
 
 
 def dice_coef(y_true, y_pred):
@@ -145,50 +87,3 @@ def loss(y_true, y_pred):
         return dice + focal_loss
 
     return loss
-
-
-def dyn_weighted_bincrossentropy(true, pred):
-    """
-    Calculates weighted binary cross entropy. The weights are determined dynamically
-    by the balance of each category. This weight is calculated for each batch.
-
-    The weights are calculted by determining the number of 'pos' and 'neg' classes
-    in the true labels, then dividing by the number of total predictions.
-
-    For example if there is 1 pos class, and 99 neg class, then the weights are 1/100 and 99/100.
-    These weights can be applied so false negatives are weighted 99/100, while false postives are weighted
-    1/100. This prevents the classifier from labeling everything negative and getting 99% accuracy.
-
-    This can be useful for unbalanced catagories.
-
-    """
-    # get the total number of inputs
-    num_pred = K.sum(K.cast(pred < 0.5, true.dtype)) + K.sum(true)
-
-    # get weight of values in 'pos' category
-    zero_weight = K.sum(true) / num_pred + K.epsilon()
-
-    # get weight of values in 'false' category
-    one_weight = K.sum(K.cast(pred < 0.5, true.dtype)) / num_pred + K.epsilon()
-
-    # calculate the weight vector
-    weights = (1.0 - true) * zero_weight + true * one_weight
-
-    # calculate the binary cross entropy
-    bin_crossentropy = K.binary_crossentropy(true, pred)
-
-    # apply the weights
-    weighted_bin_crossentropy = weights * bin_crossentropy
-
-    return K.mean(weighted_bin_crossentropy)
-
-
-def dice_coef_nosq(y_true, y_pred):
-    smooth = K.epsilon()
-    y_true_f = K.flatten(y_true)
-    y_pred_f = K.flatten(y_pred)
-    intersection = K.sum(y_true_f * y_pred_f)
-    dice = (2.0 * intersection + smooth) / (
-        K.sum(K.square(y_true_f)) + K.sum(K.square(y_pred_f)) + smooth
-    )
-    return dice
diff --git a/src/models/train_model.py b/src/models/train_model.py
@@ -20,7 +20,8 @@
 from src.models.lib.loss import (categorical_focal_loss, dice_coef,
                                  dice_coef_nosq, dice_focal, dice_loss,
                                  dice_loss_nosq, dyn_weighted_bincrossentropy,
-                                 log_cosh_dice_focal, log_cosh_dice_loss,log_cosh_dice_loss_nosq)
+                                 log_cosh_dice_focal, log_cosh_dice_loss,
+                                 log_cosh_dice_loss_nosq)
 from src.models.lib.utils import loss_dict_gen, parse_list_string
 
 
@@ -71,7 +72,7 @@ def train_model(
         strategy = tf.distribute.MirroredStrategy(devices_name)
         with strategy.scope():
             metrics = [
-                dice_coef_nosq,
+                dice_coef,
             ]
             model, model_layer_name = build_unet_pp(model_config, custom=custom)
 
@@ -88,7 +89,7 @@ def train_model(
             )
     else:
         metrics = [
-            dice_coef_nosq,
+            dice_coef,
         ]
         model, model_layer_name = build_unet_pp(model_config, custom=custom)
 
diff --git a/src/system/pipeline/output.py b/src/system/pipeline/output.py
@@ -142,7 +142,7 @@ def ground_truth_auto_cac(img_dcm_paths, loc_lists, mem_opt=False):
     for index, (img_dcm_path, loc_list) in enumerate(zip(img_dcm_paths, loc_lists)):
         ## Preprocessing
         # Get Image HU and pixel spacing
-        img_hu, pxl_spc = extract_dcm(img_dcm_path)
+        img_hu, pxl_spc, img_arr = extract_dcm(img_dcm_path)
 
         temp = np.zeros((512, 512))
         temp[tuple(zip(*loc_list))] = 1