Upload New File

a8da0837 · cordina · 9c26871c · a8da0837
Commit a8da0837 authored 3 years ago by cordina
--- a/3d-cnn-model-with-axis-normalization.ipynb
+++ b/3d-cnn-model-with-axis-normalization.ipynb
+{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install ../input/nifti-converter/dicom2nifti-2.3.0/dicom2nifti-2.3.0\n#file:///srv/pkg/mypackage\n#!pip install dicom2nifti","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:34:59.138527Z","iopub.execute_input":"2022-01-20T19:34:59.139622Z","iopub.status.idle":"2022-01-20T19:35:32.086141Z","shell.execute_reply.started":"2022-01-20T19:34:59.139578Z","shell.execute_reply":"2022-01-20T19:35:32.085130Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"code","source":"import os\nimport glob\n\nimport pandas as pd\nimport numpy as np\nfrom pathlib import Path\nimport random\nfrom tqdm.notebook import tqdm\nimport matplotlib.pyplot as plt\n\nimport pydicom # Handle MRI images\nimport dicom2nifti\nfrom dicom2nifti.exceptions import ConversionValidationError\n\nimport cv2  # OpenCV - https://docs.opencv.org/master/d6/d00/tutorial_py_root.html\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import roc_auc_score\n\nfrom scipy import ndimage\n\nimport tensorflow as tf\nfrom tensorflow import keras\nfrom tensorflow.keras.utils import to_categorical\nfrom tensorflow.keras import layers","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:35:32.088789Z","iopub.execute_input":"2022-01-20T19:35:32.089124Z","iopub.status.idle":"2022-01-20T19:35:39.276256Z","shell.execute_reply.started":"2022-01-20T19:35:32.089083Z","shell.execute_reply":"2022-01-20T19:35:39.275316Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"data_dir = Path('../input/rsna-miccai-brain-tumor-radiogenomic-classification/')\n\nmri_types = [\"FLAIR\", \"T1w\", \"T2w\", \"T1wCE\"]\nexcluded_images = [109, 123, 709] # Bad images","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:35:39.277691Z","iopub.execute_input":"2022-01-20T19:35:39.277966Z","iopub.status.idle":"2022-01-20T19:35:39.288391Z","shell.execute_reply.started":"2022-01-20T19:35:39.277927Z","shell.execute_reply":"2022-01-20T19:35:39.285921Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"train_df = pd.read_csv(data_dir / \"train_labels.csv\")\ntest_df = pd.read_csv(data_dir / \"sample_submission.csv\")\nsample_submission = pd.read_csv(data_dir / \"sample_submission.csv\")\n\ntrain_df = train_df[~train_df.BraTS21ID.isin(excluded_images)]\n\nprint(f\"train data: Rows={train_df.shape[0]}, Columns={train_df.shape[1]}\")\nprint(f\"test data : Rows={test_df.shape[0]}, Columns={test_df.shape[1]}\")","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:35:39.291747Z","iopub.execute_input":"2022-01-20T19:35:39.292043Z","iopub.status.idle":"2022-01-20T19:35:39.393037Z","shell.execute_reply.started":"2022-01-20T19:35:39.292003Z","shell.execute_reply":"2022-01-20T19:35:39.391837Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"def resize_volume(img):\n    \"\"\"Resize across z-axis\"\"\"\n    # Set the desired depth\n    desired_depth = 64\n    desired_width = 128\n    desired_height = 128\n    # Get current depth\n    current_depth = img.shape[-1]\n    current_width = img.shape[0]\n    current_height = img.shape[1]\n    # Compute depth factor\n    depth = current_depth / desired_depth\n    width = current_width / desired_width\n    height = current_height / desired_height\n    depth_factor = 1 / depth\n    width_factor = 1 / width\n    height_factor = 1 / height\n    # Resize across z-axis\n    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)\n    return img","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:35:39.396612Z","iopub.execute_input":"2022-01-20T19:35:39.396965Z","iopub.status.idle":"2022-01-20T19:35:39.404999Z","shell.execute_reply.started":"2022-01-20T19:35:39.396923Z","shell.execute_reply":"2022-01-20T19:35:39.403693Z"},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"code","source":"def load_dicom(path, size = 224):\n    dicom = pydicom.read_file(path)\n    data = dicom.pixel_array\n    if np.max(data) != 0:\n        data = data / np.max(data)\n    data = (data * 255).astype(np.uint8)\n    return cv2.resize(data, (size, size))","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:35:39.406871Z","iopub.execute_input":"2022-01-20T19:35:39.407269Z","iopub.status.idle":"2022-01-20T19:35:39.418867Z","shell.execute_reply.started":"2022-01-20T19:35:39.407225Z","shell.execute_reply":"2022-01-20T19:35:39.417657Z"},"trusted":true},"execution_count":7,"outputs":[]},{"cell_type":"code","source":"def load_dicom2(path):\n    data = np.concatenate([tf.expand_dims(pydicom.read_file(p).pixel_array, axis=-1) for p in path], axis=2)\n    if np.max(data) != 0:\n        data = data / np.max(data)\n    data = (data * 255).astype(np.uint8)\n    return resize_volume(data)","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:35:39.420963Z","iopub.execute_input":"2022-01-20T19:35:39.421411Z","iopub.status.idle":"2022-01-20T19:35:39.430971Z","shell.execute_reply.started":"2022-01-20T19:35:39.421311Z","shell.execute_reply":"2022-01-20T19:35:39.429720Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"def get_all_image_paths(brats21id, image_type, folder='train'):\n    assert(image_type in mri_types)\n    \n    patient_path = os.path.join(\n        \"../input/rsna-miccai-brain-tumor-radiogenomic-classification\",\n        folder, \n        str(brats21id).zfill(5),\n    )\n\n    paths = sorted(\n        glob.glob(os.path.join(patient_path, image_type, \"*\")), \n        key=lambda x: int(x[:-4].split(\"-\")[-1]),\n    )\n    \n    num_images = len(paths)\n    \n    start = int(num_images * 0.25)\n    end = int(num_images * 0.75)\n\n    interval = 3\n    \n    if num_images < 10: \n        interval = 1\n    \n    return np.array(paths[start:end:interval])\n\ndef get_all_images2(brats21id, image_type, folder='train'):\n    return [load_dicom2(get_all_image_paths(brats21id, image_type, folder))]","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:35:39.432880Z","iopub.execute_input":"2022-01-20T19:35:39.433278Z","iopub.status.idle":"2022-01-20T19:35:39.445397Z","shell.execute_reply.started":"2022-01-20T19:35:39.433192Z","shell.execute_reply":"2022-01-20T19:35:39.443995Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"def first_last_true(arr):\n    first = 0\n    for i,ele in enumerate(arr):\n        if ele:\n            first = i\n            break\n    last = len(arr)\n    while i > 1:\n        if arr[last-1]:\n            break\n        last -= 1\n    return first, last\n\ndef remove_zeros(data):\n    axis0 = data.any(axis=(1,2))\n    axis0s, axis0e = first_last_true(axis0)\n    axis1 = data.any(axis=(0,2))\n    axis1s, axis1e = first_last_true(axis1)\n    axis2 = data.any(axis=(0,1))\n    axis2s, axis2e = first_last_true(axis2)\n    return data[axis0s:axis0e, axis1s:axis1e, axis2s:axis2e]\n\ndef load_nii_from_dicom_series(path):\n    data = dicom2nifti.dicom_series_to_nifti(path, output_file=\"test.nii\")\n    data = remove_zeros(data[\"NII\"].get_fdata())\n    data -= np.min(data)\n    max_val = np.max(data)\n    if max_val > 0:\n        data /= max_val\n    \n    num_images = data.shape[0]\n    \n    start = int(num_images * 0.25)\n    end = int(num_images * 0.75)\n\n    interval = 3\n    \n    if num_images < 10: \n        interval = 1\n        \n    return resize_volume((data[start:end:interval] * 255).astype(np.uint8))\n\ndef get_all_images3(brats21id, image_type, folder):\n    assert(image_type in mri_types)\n    \n    patient_path = os.path.join(\n        \"../input/rsna-miccai-brain-tumor-radiogenomic-classification\",\n        folder, str(brats21id).zfill(5), image_type\n    )\n    return [load_nii_from_dicom_series(patient_path)]","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:35:39.447366Z","iopub.execute_input":"2022-01-20T19:35:39.448014Z","iopub.status.idle":"2022-01-20T19:35:39.464580Z","shell.execute_reply.started":"2022-01-20T19:35:39.447965Z","shell.execute_reply":"2022-01-20T19:35:39.463375Z"},"trusted":true},"execution_count":10,"outputs":[]},{"cell_type":"code","source":"def get_all_data_for_train(image_type):\n    global train_df\n    \n    X = []\n    y = []\n    train_ids = []\n\n    for i in tqdm(train_df.index):\n        x = train_df.loc[i]\n        try:\n            images = get_all_images3(int(x['BraTS21ID']), image_type, 'train')\n        except ConversionValidationError:\n            print(\"SLICE_INCREMENT_INCONSISTENT for\",x['BraTS21ID'])\n            images = get_all_images2(int(x['BraTS21ID']), image_type, 'train')\n        label = x['MGMT_value']\n\n        X += images\n        y += [label] * len(images)\n        train_ids += [int(x['BraTS21ID'])] * len(images)\n        assert(len(X) == len(y))\n    return np.array(X), np.array(y), np.array(train_ids)","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:35:39.467350Z","iopub.execute_input":"2022-01-20T19:35:39.467651Z","iopub.status.idle":"2022-01-20T19:35:39.481093Z","shell.execute_reply.started":"2022-01-20T19:35:39.467602Z","shell.execute_reply":"2022-01-20T19:35:39.479847Z"},"trusted":true},"execution_count":11,"outputs":[]},{"cell_type":"code","source":"def get_all_data_for_test(image_type):\n    global test_df\n    \n    X = []\n    test_ids = []\n\n    for i in tqdm(test_df.index):\n        x = test_df.loc[i]\n        try:\n            images = get_all_images3(int(x['BraTS21ID']), image_type, 'test')\n        except ConversionValidationError:\n            print(\"SLICE_INCREMENT_INCONSISTENT for\",x['BraTS21ID'])\n            images = get_all_images2(int(x['BraTS21ID']), image_type, 'test')\n        X += images\n        test_ids += [int(x['BraTS21ID'])] * len(images)\n\n    return np.array(X), np.array(test_ids)","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:35:39.484633Z","iopub.execute_input":"2022-01-20T19:35:39.485397Z","iopub.status.idle":"2022-01-20T19:35:39.495440Z","shell.execute_reply.started":"2022-01-20T19:35:39.485337Z","shell.execute_reply":"2022-01-20T19:35:39.494327Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"X, y, trainidt = get_all_data_for_train('T2w')\nX_test, testidt = get_all_data_for_test('T2w')","metadata":{"execution":{"iopub.status.busy":"2022-01-20T19:35:39.497210Z","iopub.execute_input":"2022-01-20T19:35:39.497681Z","iopub.status.idle":"2022-01-20T20:35:38.055442Z","shell.execute_reply.started":"2022-01-20T19:35:39.497634Z","shell.execute_reply":"2022-01-20T20:35:38.054424Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"code","source":"# source: https://keras.io/examples/vision/3D_image_classification/\ndef get_3DCNNmodel(width=128, height=128, depth=64, name='3dcnn'):\n    \"\"\"Build a 3D convolutional neural network model.\"\"\"\n\n    inputs = tf.keras.Input((width, height, depth, 1))\n\n    x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation=\"relu\")(inputs)\n    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)\n    x = tf.keras.layers.BatchNormalization()(x)\n\n    x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation=\"relu\")(x)\n    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)\n    x = tf.keras.layers.BatchNormalization()(x)\n\n    x = tf.keras.layers.Conv3D(filters=128, kernel_size=3, activation=\"relu\")(x)\n    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)\n    x = tf.keras.layers.BatchNormalization()(x)\n\n    x = tf.keras.layers.Conv3D(filters=256, kernel_size=3, activation=\"relu\")(x)\n    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)\n    x = tf.keras.layers.BatchNormalization()(x)\n\n    x = tf.keras.layers.GlobalAveragePooling3D()(x)\n    x = tf.keras.layers.Dense(units=512, activation=\"relu\")(x)\n    x = tf.keras.layers.Dropout(0.3)(x)\n\n    #outputs = tf.keras.layers.Dense(units=1, activation=\"sigmoid\")(x)\n    output = keras.layers.Dense(2, activation=\"softmax\")(x)\n\n    #model = tf.keras.Model(inputs, outputs, name=name)\n\n    #initial_learning_rate = 0.0001\n    #lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(\n    #    initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True\n    #)\n    #model.compile(\n    #    loss=\"binary_crossentropy\",\n    #    optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),\n    #    metrics=[\"acc\"],\n    #)\n    model = keras.Model(inputs, output)\n    initial_learning_rate =  0.0001\n    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(\n        initial_learning_rate,\n        decay_steps=100000,\n        decay_rate=0.96, \n        staircase=True\n    )\n  \n    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')\n\n    model.compile(\n        loss=\"categorical_crossentropy\", \n        optimizer=keras.optimizers.Adam(),\n        metrics=[roc_auc],\n    )\n    return model","metadata":{"execution":{"iopub.status.busy":"2022-01-20T20:35:38.057183Z","iopub.execute_input":"2022-01-20T20:35:38.057508Z","iopub.status.idle":"2022-01-20T20:35:38.074602Z","shell.execute_reply.started":"2022-01-20T20:35:38.057463Z","shell.execute_reply":"2022-01-20T20:35:38.073501Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor=\"val_roc_auc\", mode='max', patience=10) #patience=10","metadata":{"execution":{"iopub.status.busy":"2022-01-20T20:35:38.079304Z","iopub.execute_input":"2022-01-20T20:35:38.079568Z","iopub.status.idle":"2022-01-20T20:35:38.105956Z","shell.execute_reply.started":"2022-01-20T20:35:38.079535Z","shell.execute_reply":"2022-01-20T20:35:38.104929Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"auc_list = []\nbest_auc = float(\"-inf\")\n\nfor i in tqdm(range(20)):\n    checkpoint_filepath = \"best_model_\"+str(i)+\".h5\"\n\n    model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(\n        filepath=checkpoint_filepath,\n        save_weights_only=False,\n        monitor=\"val_roc_auc\",\n        mode=\"max\",\n        save_best_only=True,\n        save_freq=\"epoch\",\n        verbose=0,\n    )\n    X_train, X_valid, y_train, y_valid, trainidt_train, trainidt_valid = train_test_split(X, y, trainidt, test_size=0.2, random_state=i)\n\n    X_train = tf.expand_dims(X_train, axis=-1)\n    X_valid = tf.expand_dims(X_valid, axis=-1)\n    y_train = to_categorical(y_train)\n    y_valid = to_categorical(y_valid)\n\n    model = get_3DCNNmodel()\n\n    history = model.fit(x=X_train, y = y_train, epochs=40, batch_size = 2,\n                        callbacks=[model_checkpoint_cb, early_stopping_cb],\n                        validation_data=(X_valid, y_valid), verbose=0)\n\n    model_best = tf.keras.models.load_model(filepath=checkpoint_filepath)\n    y_pred = model_best.predict(X_valid,batch_size = 2)\n\n    pred = np.argmax(y_pred, axis=1)\n\n    result = pd.DataFrame(trainidt_valid)\n    result[1] = pred\n\n    result.columns = [\"BraTS21ID\", \"MGMT_value\"]\n    result2 = result.groupby(\"BraTS21ID\", as_index=False).mean()\n\n    result2 = result2.merge(train_df, on=\"BraTS21ID\")\n    auc = roc_auc_score(\n        result2.MGMT_value_y,\n        result2.MGMT_value_x,\n    )\n    print(f\"Validation AUC={auc}\")\n    auc_list.append(auc)\n    if auc > best_auc:\n        best_i = i\n        best_auc = auc","metadata":{"execution":{"iopub.status.busy":"2022-01-20T20:35:38.108170Z","iopub.execute_input":"2022-01-20T20:35:38.108658Z","iopub.status.idle":"2022-01-20T23:09:57.598808Z","shell.execute_reply.started":"2022-01-20T20:35:38.108611Z","shell.execute_reply":"2022-01-20T23:09:57.597566Z"},"trusted":true},"execution_count":16,"outputs":[]},{"cell_type":"code","source":"plt.hist(auc_list)\nplt.xlabel(\"AUC\")\nplt.ylabel(\"No. of trials\")\nplt.title(f\"Mean AUC = {np.mean(auc_list)}\")\nplt.show()","metadata":{"execution":{"iopub.status.busy":"2022-01-20T23:09:57.600492Z","iopub.execute_input":"2022-01-20T23:09:57.604151Z","iopub.status.idle":"2022-01-20T23:09:57.941487Z","shell.execute_reply.started":"2022-01-20T23:09:57.604041Z","shell.execute_reply":"2022-01-20T23:09:57.940508Z"},"trusted":true},"execution_count":17,"outputs":[]},{"cell_type":"code","source":"checkpoint_filepath = \"best_model_\"+str(best_i)+\".h5\"\nprint(f\"Using {checkpoint_filepath} with AUC = {best_auc}.\")\nmodel_best = tf.keras.models.load_model(filepath=checkpoint_filepath)\ny_pred = model_best.predict(X_test,batch_size = 2)\n\npred = np.argmax(y_pred, axis=1) #\n\nresult = pd.DataFrame(testidt)\nresult[1] = pred\npred","metadata":{"execution":{"iopub.status.busy":"2022-01-20T23:09:57.942750Z","iopub.execute_input":"2022-01-20T23:09:57.943024Z","iopub.status.idle":"2022-01-20T23:09:59.684831Z","shell.execute_reply.started":"2022-01-20T23:09:57.942982Z","shell.execute_reply":"2022-01-20T23:09:59.683948Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"code","source":"result.columns=['BraTS21ID','MGMT_value']\n\nresult2 = result.groupby('BraTS21ID',as_index=False).mean()\nresult2['BraTS21ID'] = sample_submission['BraTS21ID']\n\nresult2['MGMT_value'] = result2['MGMT_value'].apply(lambda x:round(x*10)/10)\n\nresult2.to_csv('submission.csv',index=False)\nresult2","metadata":{"execution":{"iopub.status.busy":"2022-01-20T23:09:59.689584Z","iopub.execute_input":"2022-01-20T23:09:59.692596Z","iopub.status.idle":"2022-01-20T23:09:59.744853Z","shell.execute_reply.started":"2022-01-20T23:09:59.692531Z","shell.execute_reply":"2022-01-20T23:09:59.743973Z"},"trusted":true},"execution_count":19,"outputs":[]}]}
\ No newline at end of file
+%% Cell type:code id: tags:
+``` python
+!pip install ../input/nifti-converter/dicom2nifti-2.3.0/dicom2nifti-2.3.0
+#file:///srv/pkg/mypackage
+#!pip install dicom2nifti
+```
+%% Cell type:code id: tags:
+``` python
+import os
+import glob
+import pandas as pd
+import numpy as np
+from pathlib import Path
+import random
+from tqdm.notebook import tqdm
+import matplotlib.pyplot as plt
+import pydicom # Handle MRI images
+import dicom2nifti
+from dicom2nifti.exceptions import ConversionValidationError
+import cv2  # OpenCV - https://docs.opencv.org/master/d6/d00/tutorial_py_root.html
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import roc_auc_score
+from scipy import ndimage
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.utils import to_categorical
+from tensorflow.keras import layers
+```
+%% Cell type:code id: tags:
+``` python
+data_dir = Path('../input/rsna-miccai-brain-tumor-radiogenomic-classification/')
+mri_types = ["FLAIR", "T1w", "T2w", "T1wCE"]
+excluded_images = [109, 123, 709] # Bad images
+```
+%% Cell type:code id: tags:
+``` python
+train_df = pd.read_csv(data_dir / "train_labels.csv")
+test_df = pd.read_csv(data_dir / "sample_submission.csv")
+sample_submission = pd.read_csv(data_dir / "sample_submission.csv")
+train_df = train_df[~train_df.BraTS21ID.isin(excluded_images)]
+print(f"train data: Rows={train_df.shape[0]}, Columns={train_df.shape[1]}")
+print(f"test data : Rows={test_df.shape[0]}, Columns={test_df.shape[1]}")
+```
+%% Cell type:code id: tags:
+``` python
+def resize_volume(img):
+    """Resize across z-axis"""
+    # Set the desired depth
+    desired_depth = 64
+    desired_width = 128
+    desired_height = 128
+    # Get current depth
+    current_depth = img.shape[-1]
+    current_width = img.shape[0]
+    current_height = img.shape[1]
+    # Compute depth factor
+    depth = current_depth / desired_depth
+    width = current_width / desired_width
+    height = current_height / desired_height
+    depth_factor = 1 / depth
+    width_factor = 1 / width
+    height_factor = 1 / height
+    # Resize across z-axis
+    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)
+    return img
+```
+%% Cell type:code id: tags:
+``` python
+def load_dicom(path, size = 224):
+    dicom = pydicom.read_file(path)
+    data = dicom.pixel_array
+    if np.max(data) != 0:
+        data = data / np.max(data)
+    data = (data * 255).astype(np.uint8)
+    return cv2.resize(data, (size, size))
+```
+%% Cell type:code id: tags:
+``` python
+def load_dicom2(path):
+    data = np.concatenate([tf.expand_dims(pydicom.read_file(p).pixel_array, axis=-1) for p in path], axis=2)
+    if np.max(data) != 0:
+        data = data / np.max(data)
+    data = (data * 255).astype(np.uint8)
+    return resize_volume(data)
+```
+%% Cell type:code id: tags:
+``` python
+def get_all_image_paths(brats21id, image_type, folder='train'):
+    assert(image_type in mri_types)
+    patient_path = os.path.join(
+        "../input/rsna-miccai-brain-tumor-radiogenomic-classification",
+        folder, 
+        str(brats21id).zfill(5),
+    )
+    paths = sorted(
+        glob.glob(os.path.join(patient_path, image_type, "*")), 
+        key=lambda x: int(x[:-4].split("-")[-1]),
+    )
+    num_images = len(paths)
+    start = int(num_images * 0.25)
+    end = int(num_images * 0.75)
+    interval = 3
+    if num_images < 10: 
+        interval = 1
+    return np.array(paths[start:end:interval])
+def get_all_images2(brats21id, image_type, folder='train'):
+    return [load_dicom2(get_all_image_paths(brats21id, image_type, folder))]
+```
+%% Cell type:code id: tags:
+``` python
+def first_last_true(arr):
+    first = 0
+    for i,ele in enumerate(arr):
+        if ele:
+            first = i
+            break
+    last = len(arr)
+    while i > 1:
+        if arr[last-1]:
+            break
+        last -= 1
+    return first, last
+def remove_zeros(data):
+    axis0 = data.any(axis=(1,2))
+    axis0s, axis0e = first_last_true(axis0)
+    axis1 = data.any(axis=(0,2))
+    axis1s, axis1e = first_last_true(axis1)
+    axis2 = data.any(axis=(0,1))
+    axis2s, axis2e = first_last_true(axis2)
+    return data[axis0s:axis0e, axis1s:axis1e, axis2s:axis2e]
+def load_nii_from_dicom_series(path):
+    data = dicom2nifti.dicom_series_to_nifti(path, output_file="test.nii")
+    data = remove_zeros(data["NII"].get_fdata())
+    data -= np.min(data)
+    max_val = np.max(data)
+    if max_val > 0:
+        data /= max_val
+    num_images = data.shape[0]
+    start = int(num_images * 0.25)
+    end = int(num_images * 0.75)
+    interval = 3
+    if num_images < 10: 
+        interval = 1
+    return resize_volume((data[start:end:interval] * 255).astype(np.uint8))
+def get_all_images3(brats21id, image_type, folder):
+    assert(image_type in mri_types)
+    patient_path = os.path.join(
+        "../input/rsna-miccai-brain-tumor-radiogenomic-classification",
+        folder, str(brats21id).zfill(5), image_type
+    )
+    return [load_nii_from_dicom_series(patient_path)]
+```
+%% Cell type:code id: tags:
+``` python
+def get_all_data_for_train(image_type):
+    global train_df
+    X = []
+    y = []
+    train_ids = []
+    for i in tqdm(train_df.index):
+        x = train_df.loc[i]
+        try:
+            images = get_all_images3(int(x['BraTS21ID']), image_type, 'train')
+        except ConversionValidationError:
+            print("SLICE_INCREMENT_INCONSISTENT for",x['BraTS21ID'])
+            images = get_all_images2(int(x['BraTS21ID']), image_type, 'train')
+        label = x['MGMT_value']
+        X += images
+        y += [label] * len(images)
+        train_ids += [int(x['BraTS21ID'])] * len(images)
+        assert(len(X) == len(y))
+    return np.array(X), np.array(y), np.array(train_ids)
+```
+%% Cell type:code id: tags:
+``` python
+def get_all_data_for_test(image_type):
+    global test_df
+    X = []
+    test_ids = []
+    for i in tqdm(test_df.index):
+        x = test_df.loc[i]
+        try:
+            images = get_all_images3(int(x['BraTS21ID']), image_type, 'test')
+        except ConversionValidationError:
+            print("SLICE_INCREMENT_INCONSISTENT for",x['BraTS21ID'])
+            images = get_all_images2(int(x['BraTS21ID']), image_type, 'test')
+        X += images
+        test_ids += [int(x['BraTS21ID'])] * len(images)
+    return np.array(X), np.array(test_ids)
+```
+%% Cell type:code id: tags:
+``` python
+X, y, trainidt = get_all_data_for_train('T2w')
+X_test, testidt = get_all_data_for_test('T2w')
+```
+%% Cell type:code id: tags:
+``` python
+# source: https://keras.io/examples/vision/3D_image_classification/
+def get_3DCNNmodel(width=128, height=128, depth=64, name='3dcnn'):
+    """Build a 3D convolutional neural network model."""
+    inputs = tf.keras.Input((width, height, depth, 1))
+    x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
+    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
+    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
+    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
+    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.GlobalAveragePooling3D()(x)
+    x = tf.keras.layers.Dense(units=512, activation="relu")(x)
+    x = tf.keras.layers.Dropout(0.3)(x)
+    #outputs = tf.keras.layers.Dense(units=1, activation="sigmoid")(x)
+    output = keras.layers.Dense(2, activation="softmax")(x)
+    #model = tf.keras.Model(inputs, outputs, name=name)
+    #initial_learning_rate = 0.0001
+    #lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
+    #    initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
+    #)
+    #model.compile(
+    #    loss="binary_crossentropy",
+    #    optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
+    #    metrics=["acc"],
+    #)
+    model = keras.Model(inputs, output)
+    initial_learning_rate =  0.0001
+    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
+        initial_learning_rate,
+        decay_steps=100000,
+        decay_rate=0.96, 
+        staircase=True
+    )
+    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')
+    model.compile(
+        loss="categorical_crossentropy", 
+        optimizer=keras.optimizers.Adam(),
+        metrics=[roc_auc],
+    )
+    return model
+```
+%% Cell type:code id: tags:
+``` python
+early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor="val_roc_auc", mode='max', patience=10) #patience=10
+```
+%% Cell type:code id: tags:
+``` python
+auc_list = []
+best_auc = float("-inf")
+for i in tqdm(range(20)):
+    checkpoint_filepath = "best_model_"+str(i)+".h5"
+    model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
+        filepath=checkpoint_filepath,
+        save_weights_only=False,
+        monitor="val_roc_auc",
+        mode="max",
+        save_best_only=True,
+        save_freq="epoch",
+        verbose=0,
+    )
+    X_train, X_valid, y_train, y_valid, trainidt_train, trainidt_valid = train_test_split(X, y, trainidt, test_size=0.2, random_state=i)
+    X_train = tf.expand_dims(X_train, axis=-1)
+    X_valid = tf.expand_dims(X_valid, axis=-1)
+    y_train = to_categorical(y_train)
+    y_valid = to_categorical(y_valid)
+    model = get_3DCNNmodel()
+    history = model.fit(x=X_train, y = y_train, epochs=40, batch_size = 2,
+                        callbacks=[model_checkpoint_cb, early_stopping_cb],
+                        validation_data=(X_valid, y_valid), verbose=0)
+    model_best = tf.keras.models.load_model(filepath=checkpoint_filepath)
+    y_pred = model_best.predict(X_valid,batch_size = 2)
+    pred = np.argmax(y_pred, axis=1)
+    result = pd.DataFrame(trainidt_valid)
+    result[1] = pred
+    result.columns = ["BraTS21ID", "MGMT_value"]
+    result2 = result.groupby("BraTS21ID", as_index=False).mean()
+    result2 = result2.merge(train_df, on="BraTS21ID")
+    auc = roc_auc_score(
+        result2.MGMT_value_y,
+        result2.MGMT_value_x,
+    )
+    print(f"Validation AUC={auc}")
+    auc_list.append(auc)
+    if auc > best_auc:
+        best_i = i
+        best_auc = auc
+```
+%% Cell type:code id: tags:
+``` python
+plt.hist(auc_list)
+plt.xlabel("AUC")
+plt.ylabel("No. of trials")
+plt.title(f"Mean AUC = {np.mean(auc_list)}")
+plt.show()
+```
+%% Cell type:code id: tags:
+``` python
+checkpoint_filepath = "best_model_"+str(best_i)+".h5"
+print(f"Using {checkpoint_filepath} with AUC = {best_auc}.")
+model_best = tf.keras.models.load_model(filepath=checkpoint_filepath)
+y_pred = model_best.predict(X_test,batch_size = 2)
+pred = np.argmax(y_pred, axis=1) #
+result = pd.DataFrame(testidt)
+result[1] = pred
+pred
+```
+%% Cell type:code id: tags:
+``` python
+result.columns=['BraTS21ID','MGMT_value']
+result2 = result.groupby('BraTS21ID',as_index=False).mean()
+result2['BraTS21ID'] = sample_submission['BraTS21ID']
+result2['MGMT_value'] = result2['MGMT_value'].apply(lambda x:round(x*10)/10)
+result2.to_csv('submission.csv',index=False)
+result2
+```