Skip to content
Snippets Groups Projects
Commit a8da0837 authored by cordina's avatar cordina
Browse files

Upload New File

parent 9c26871c
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
!pip install ../input/nifti-converter/dicom2nifti-2.3.0/dicom2nifti-2.3.0
#file:///srv/pkg/mypackage
#!pip install dicom2nifti
```
%% Cell type:code id: tags:
``` python
import os
import glob
import pandas as pd
import numpy as np
from pathlib import Path
import random
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import pydicom # Handle MRI images
import dicom2nifti
from dicom2nifti.exceptions import ConversionValidationError
import cv2 # OpenCV - https://docs.opencv.org/master/d6/d00/tutorial_py_root.html
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from scipy import ndimage
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
```
%% Cell type:code id: tags:
``` python
data_dir = Path('../input/rsna-miccai-brain-tumor-radiogenomic-classification/')
mri_types = ["FLAIR", "T1w", "T2w", "T1wCE"]
excluded_images = [109, 123, 709] # Bad images
```
%% Cell type:code id: tags:
``` python
train_df = pd.read_csv(data_dir / "train_labels.csv")
test_df = pd.read_csv(data_dir / "sample_submission.csv")
sample_submission = pd.read_csv(data_dir / "sample_submission.csv")
train_df = train_df[~train_df.BraTS21ID.isin(excluded_images)]
print(f"train data: Rows={train_df.shape[0]}, Columns={train_df.shape[1]}")
print(f"test data : Rows={test_df.shape[0]}, Columns={test_df.shape[1]}")
```
%% Cell type:code id: tags:
``` python
def resize_volume(img):
"""Resize across z-axis"""
# Set the desired depth
desired_depth = 64
desired_width = 128
desired_height = 128
# Get current depth
current_depth = img.shape[-1]
current_width = img.shape[0]
current_height = img.shape[1]
# Compute depth factor
depth = current_depth / desired_depth
width = current_width / desired_width
height = current_height / desired_height
depth_factor = 1 / depth
width_factor = 1 / width
height_factor = 1 / height
# Resize across z-axis
img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)
return img
```
%% Cell type:code id: tags:
``` python
def load_dicom(path, size = 224):
dicom = pydicom.read_file(path)
data = dicom.pixel_array
if np.max(data) != 0:
data = data / np.max(data)
data = (data * 255).astype(np.uint8)
return cv2.resize(data, (size, size))
```
%% Cell type:code id: tags:
``` python
def load_dicom2(path):
data = np.concatenate([tf.expand_dims(pydicom.read_file(p).pixel_array, axis=-1) for p in path], axis=2)
if np.max(data) != 0:
data = data / np.max(data)
data = (data * 255).astype(np.uint8)
return resize_volume(data)
```
%% Cell type:code id: tags:
``` python
def get_all_image_paths(brats21id, image_type, folder='train'):
assert(image_type in mri_types)
patient_path = os.path.join(
"../input/rsna-miccai-brain-tumor-radiogenomic-classification",
folder,
str(brats21id).zfill(5),
)
paths = sorted(
glob.glob(os.path.join(patient_path, image_type, "*")),
key=lambda x: int(x[:-4].split("-")[-1]),
)
num_images = len(paths)
start = int(num_images * 0.25)
end = int(num_images * 0.75)
interval = 3
if num_images < 10:
interval = 1
return np.array(paths[start:end:interval])
def get_all_images2(brats21id, image_type, folder='train'):
return [load_dicom2(get_all_image_paths(brats21id, image_type, folder))]
```
%% Cell type:code id: tags:
``` python
def first_last_true(arr):
first = 0
for i,ele in enumerate(arr):
if ele:
first = i
break
last = len(arr)
while i > 1:
if arr[last-1]:
break
last -= 1
return first, last
def remove_zeros(data):
axis0 = data.any(axis=(1,2))
axis0s, axis0e = first_last_true(axis0)
axis1 = data.any(axis=(0,2))
axis1s, axis1e = first_last_true(axis1)
axis2 = data.any(axis=(0,1))
axis2s, axis2e = first_last_true(axis2)
return data[axis0s:axis0e, axis1s:axis1e, axis2s:axis2e]
def load_nii_from_dicom_series(path):
data = dicom2nifti.dicom_series_to_nifti(path, output_file="test.nii")
data = remove_zeros(data["NII"].get_fdata())
data -= np.min(data)
max_val = np.max(data)
if max_val > 0:
data /= max_val
num_images = data.shape[0]
start = int(num_images * 0.25)
end = int(num_images * 0.75)
interval = 3
if num_images < 10:
interval = 1
return resize_volume((data[start:end:interval] * 255).astype(np.uint8))
def get_all_images3(brats21id, image_type, folder):
assert(image_type in mri_types)
patient_path = os.path.join(
"../input/rsna-miccai-brain-tumor-radiogenomic-classification",
folder, str(brats21id).zfill(5), image_type
)
return [load_nii_from_dicom_series(patient_path)]
```
%% Cell type:code id: tags:
``` python
def get_all_data_for_train(image_type):
global train_df
X = []
y = []
train_ids = []
for i in tqdm(train_df.index):
x = train_df.loc[i]
try:
images = get_all_images3(int(x['BraTS21ID']), image_type, 'train')
except ConversionValidationError:
print("SLICE_INCREMENT_INCONSISTENT for",x['BraTS21ID'])
images = get_all_images2(int(x['BraTS21ID']), image_type, 'train')
label = x['MGMT_value']
X += images
y += [label] * len(images)
train_ids += [int(x['BraTS21ID'])] * len(images)
assert(len(X) == len(y))
return np.array(X), np.array(y), np.array(train_ids)
```
%% Cell type:code id: tags:
``` python
def get_all_data_for_test(image_type):
global test_df
X = []
test_ids = []
for i in tqdm(test_df.index):
x = test_df.loc[i]
try:
images = get_all_images3(int(x['BraTS21ID']), image_type, 'test')
except ConversionValidationError:
print("SLICE_INCREMENT_INCONSISTENT for",x['BraTS21ID'])
images = get_all_images2(int(x['BraTS21ID']), image_type, 'test')
X += images
test_ids += [int(x['BraTS21ID'])] * len(images)
return np.array(X), np.array(test_ids)
```
%% Cell type:code id: tags:
``` python
X, y, trainidt = get_all_data_for_train('T2w')
X_test, testidt = get_all_data_for_test('T2w')
```
%% Cell type:code id: tags:
``` python
# source: https://keras.io/examples/vision/3D_image_classification/
def get_3DCNNmodel(width=128, height=128, depth=64, name='3dcnn'):
"""Build a 3D convolutional neural network model."""
inputs = tf.keras.Input((width, height, depth, 1))
x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.GlobalAveragePooling3D()(x)
x = tf.keras.layers.Dense(units=512, activation="relu")(x)
x = tf.keras.layers.Dropout(0.3)(x)
#outputs = tf.keras.layers.Dense(units=1, activation="sigmoid")(x)
output = keras.layers.Dense(2, activation="softmax")(x)
#model = tf.keras.Model(inputs, outputs, name=name)
#initial_learning_rate = 0.0001
#lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
# initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
#)
#model.compile(
# loss="binary_crossentropy",
# optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
# metrics=["acc"],
#)
model = keras.Model(inputs, output)
initial_learning_rate = 0.0001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate,
decay_steps=100000,
decay_rate=0.96,
staircase=True
)
roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')
model.compile(
loss="categorical_crossentropy",
optimizer=keras.optimizers.Adam(),
metrics=[roc_auc],
)
return model
```
%% Cell type:code id: tags:
``` python
early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor="val_roc_auc", mode='max', patience=10) #patience=10
```
%% Cell type:code id: tags:
``` python
auc_list = []
best_auc = float("-inf")
for i in tqdm(range(20)):
checkpoint_filepath = "best_model_"+str(i)+".h5"
model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=False,
monitor="val_roc_auc",
mode="max",
save_best_only=True,
save_freq="epoch",
verbose=0,
)
X_train, X_valid, y_train, y_valid, trainidt_train, trainidt_valid = train_test_split(X, y, trainidt, test_size=0.2, random_state=i)
X_train = tf.expand_dims(X_train, axis=-1)
X_valid = tf.expand_dims(X_valid, axis=-1)
y_train = to_categorical(y_train)
y_valid = to_categorical(y_valid)
model = get_3DCNNmodel()
history = model.fit(x=X_train, y = y_train, epochs=40, batch_size = 2,
callbacks=[model_checkpoint_cb, early_stopping_cb],
validation_data=(X_valid, y_valid), verbose=0)
model_best = tf.keras.models.load_model(filepath=checkpoint_filepath)
y_pred = model_best.predict(X_valid,batch_size = 2)
pred = np.argmax(y_pred, axis=1)
result = pd.DataFrame(trainidt_valid)
result[1] = pred
result.columns = ["BraTS21ID", "MGMT_value"]
result2 = result.groupby("BraTS21ID", as_index=False).mean()
result2 = result2.merge(train_df, on="BraTS21ID")
auc = roc_auc_score(
result2.MGMT_value_y,
result2.MGMT_value_x,
)
print(f"Validation AUC={auc}")
auc_list.append(auc)
if auc > best_auc:
best_i = i
best_auc = auc
```
%% Cell type:code id: tags:
``` python
plt.hist(auc_list)
plt.xlabel("AUC")
plt.ylabel("No. of trials")
plt.title(f"Mean AUC = {np.mean(auc_list)}")
plt.show()
```
%% Cell type:code id: tags:
``` python
checkpoint_filepath = "best_model_"+str(best_i)+".h5"
print(f"Using {checkpoint_filepath} with AUC = {best_auc}.")
model_best = tf.keras.models.load_model(filepath=checkpoint_filepath)
y_pred = model_best.predict(X_test,batch_size = 2)
pred = np.argmax(y_pred, axis=1) #
result = pd.DataFrame(testidt)
result[1] = pred
pred
```
%% Cell type:code id: tags:
``` python
result.columns=['BraTS21ID','MGMT_value']
result2 = result.groupby('BraTS21ID',as_index=False).mean()
result2['BraTS21ID'] = sample_submission['BraTS21ID']
result2['MGMT_value'] = result2['MGMT_value'].apply(lambda x:round(x*10)/10)
result2.to_csv('submission.csv',index=False)
result2
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment