diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..11c25cb --- /dev/null +++ b/.gitignore @@ -0,0 +1,274 @@ + +# Created by https://www.gitignore.io/api/osx,linux,matlab,python,pycharm+all,intellij+all,jupyternotebook + +### Intellij+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff: +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/dictionaries + +# Sensitive or high-churn files: +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.xml +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml + +# Gradle: +.idea/**/gradle.xml +.idea/**/libraries + +# CMake +cmake-build-debug/ + +# Mongo Explorer plugin: +.idea/**/mongoSettings.xml + +## File-based project format: +*.iws + +## Plugin-specific files: + +# IntelliJ +/out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Ruby plugin and RubyMine +/.rakeTasks + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +### Intellij+all Patch ### +# Ignores the whole idea folder +# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 + +.idea/ + +### JupyterNotebook ### +.ipynb_checkpoints +*/.ipynb_checkpoints/* + +# Remove previous ipynb_checkpoints +# git rm -r .ipynb_checkpoints/ +# +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### Matlab ### +##--------------------------------------------------- +## Remove autosaves generated by the Matlab editor +## We have git for backups! +##--------------------------------------------------- + +# Windows default autosave extension +*.asv + +# OSX / *nix default autosave extension +*.m~ + +# Compiled MEX binaries (all platforms) +*.mex* + +# Simulink Code Generation +slprj/ + +# Session info +octave-workspace + +# Simulink autosave extension +*.autosave + +### OSX ### +*.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### PyCharm+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff: + +# Sensitive or high-churn files: + +# Gradle: + +# CMake + +# Mongo Explorer plugin: + +## File-based project format: + +## Plugin-specific files: + +# IntelliJ + +# mpeltonen/sbt-idea plugin + +# JIRA plugin + +# Cursive Clojure plugin + +# Ruby plugin and RubyMine + +# Crashlytics plugin (for Android Studio and IntelliJ) + +### PyCharm+all Patch ### +# Ignores the whole idea folder +# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 + + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +.pytest_cache/ +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule.* + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + + +# End of https://www.gitignore.io/api/osx,linux,matlab,python,pycharm+all,intellij+all,jupyternotebook + + diff --git a/FastNonLocalMeans3D.m b/FastNonLocalMeans3D.m new file mode 100644 index 0000000..34eaf39 --- /dev/null +++ b/FastNonLocalMeans3D.m @@ -0,0 +1,300 @@ +function out = FastNonLocalMeans3D( V, sigma, beta, rs, rc, ps, flag, block ) +% A fast implementation of the non-local means based on distances in +% the features space. The full algorithm is discussed in detail in the +% following paper: +% +% A. Tristán-Vega, V. García-Pérez, S. Aja-Fernández, C.-F. Westin +% "Efficient and robust nonlocal means denoising of MR data based on +% salient features matching" +% Computer Methods and Programs in Biomedicine, vol. 105, pp. 131-144 +% (2012) +% +% If you are willing to use this software for your research, please cite +% this work. +% +% NOTE: Some of the computational features described in the paper above +% cannot be exploited in the matlab implementation. If performance is an +% issue for you, we strongly encourage you use the C++/ITK implementation +% available at: http://www.nitrc.org/projects/unlmeans, for which both +% source code and pre-compiled executables can be downloaded. +% +% USAGE: out = FastNonLocalMeans( V, sigma [beta, rs, rc, ps, flag] ) +% +% V: The input volume to be filtered (3D). - MANDATORY +% sigma: The noise power in the input image. In the Gaussian case, this +% is the standard deviation of the Gaussian noise at each pixel. +% In the Rician case, it is the standard deviation of noise in +% the original, Gaussian distributed, real and imaginary parts of +% the signal (whose modulus is computed to get the Rician +% variable). - MANDATORY +% beta: The filtering parameter. The larger its value, the more +% aggressive the filtering. The smaller its value, the better +% details are preserved. It should be in the range of 0.8 to 1.2 +% for best performance (Default: 1.0). +% rs: A 3x1 vector with the search radii (Default: 2,2,2). +% rc: A 3x1 vector with the comparison radii (Default: 1,1,1). +% ps: The preselection threshold. All those pixels in the search +% window whose normalized distance to the center pixel is larger +% than this value are automatically removed from the weighted +% average (Default: 2.0). +% flag: Must be either 'gaussian' (the default) or 'rician'. In the +% latter case, the weighted average is performed over the squared +% pixels, and the filtered value is computed as +% sqrt(mu-2·sigma^2) so that the estimate becomes unbiased. +% block: This second flag tells the algorithm if the computation of the +% weights within the search window must be done with a loop (0, +% the default since it seems to be faster for the default search +% window) or it must be done with vector operations (1). Choose 0 +% with small search radii or 1 with larger search radii. +% +% out: The filtered volume. + +if( nargin<2 ) + error('At least the input volume and the noise power must be provided'); +end + +if( nargin<3 ) + beta = 1.0; +end +h = beta*sigma; + +if( nargin<4 ) + rs = [2;2;2]; +else + if( length(rs)~=3 ) + rs = rs(1).*ones(3,1); + end +end + +if( nargin<5 ) + rc = [1;1;1]; +else + if( length(rc)~=3 ) + rc = rc(1).*ones(3,1); + end +end + +if( nargin<6 ) + ps = 2.0; +end + +if( nargin<7 ) + FLAG = 0; +else + if( strcmpi('gaussian',flag) ) + FLAG = 0; + elseif( strcmpi('rician',flag) ) + FLAG = 1; + else + error(['Unknown filtering type: ',flag]); + end +end + +if( nargin<8 ) + block = 0; +end + +% Compute the size of the image: +[Y,X,Z] = size(V); + +% Compute the features map: +[mu,Gx,Gy,Gz,factors,hcorr] = ComputeLocalFeatures3D( V, rc ); + +% Compute the effective value of h as described in the paper: +h = hcorr*h; + +% Initiallize the output: +out = zeros(Y,X,Z); + +% Loop along the pixels: +for x=1:X + for y=1:Y + for z=1:Z + % We are filtering the pixel (x,y,z). First, create a + % neighborhood around this pixel checking for out-of-bound + % indices: + mx = max( x-rs(1), 1 ); + MX = min( x+rs(1), X ); + my = max( y-rs(2), 1 ); + MY = min( y+rs(2), Y ); + mz = max( z-rs(3), 1 ); + MZ = min( z+rs(3), Z ); + % Keep the center values: + mu0 = mu(y,x,z); + gx0 = Gx(y,x,z); + gy0 = Gy(y,x,z); + gz0 = Gz(y,x,z); + if( block==1 ) + % VECTOR IMPLEMENTATION (SEEMS TO BE SLOWER): + % Get the valeus of the pixels in the whole search neihborhood: + vals = V(my:MY,mx:MX,mz:MZ); + % Get the mean values and gradients of the pixels in the whole + % search neighborhood: + mui = mu(my:MY,mx:MX,mz:MZ); + gxi = Gx(my:MY,mx:MX,mz:MZ); + gyi = Gy(my:MY,mx:MX,mz:MZ); + gzi = Gz(my:MY,mx:MX,mz:MZ); + % Compute the distances: + dists = (mui-mu0).*(mui-mu0) + ... + (gxi-gx0).*(gxi-gx0)*factors(1) + ... + (gyi-gy0).*(gyi-gy0)*factors(2) + ... + (gzi-gz0).*(gzi-gz0)*factors(3); + % Normalize the distances: + dists = dists./(h*h); + % Compute the weights: + wis = exp(-dists); + % Set to 0 the normalized distances above the threshold to + % execute pre-selection: + wis(dists>ps) = 0; + % Avoid over-weighting of the central pixel: + wis(wis>0.367879441171442) = 0.367879441171442; + % Compute the normalization factor: + NORM = sum(wis(:)); + % Filter the pixel; average the pixels or their squared values + % depending on the filtering type: + if( FLAG==0 ) % Gaussian + pixel = sum(wis(:).*vals(:)); + else % Rician + pixel = sum(wis(:).*vals(:).*vals(:)); + end + else + % LOOP IMPLEMENTATION (SEEMS TO BE FASTER): + pixel = 0.0; + NORM = 0.0; + for s=mx:MX + for t=my:MY + for u=mz:MZ + % Get the current features: + mui = mu(t,s,u); + gxi = Gx(t,s,u); + gyi = Gy(t,s,u); + gzi = Gz(t,s,u); + % Compute the distance and normalize: + dist = (mu0-mui)*(mu0-mui) + ... + (gx0-gxi)*(gx0-gxi)*factors(1) + ... + (gy0-gyi)*(gy0-gyi)*factors(2) + ... + (gz0-gzi)*(gz0-gzi)*factors(3); + dist = dist/(h*h); + % Compute the weight in case the distance is below + % the pre-selection threshold, otherwise set to 0: + if( dist0.367879441171442) + dist = 0.367879441171442; + end + % Add to the current value. Average the pixels or + % their squared values depending on the filtering + % type: + if( FLAG==0 ) % Gaussian + pixel = pixel + dist * V(t,s,u); + else %Rician + pixel = pixel + dist * V(t,s,u) * V(t,s,u); + end + % Store the normalization: + NORM = NORM + dist; + end + end + end + end + % Normalize the pixel. If we are in the Rician case, we need + % also to remove the bias: + if( FLAG==0 ) % Gaussian + pixel = pixel/NORM; + else % Rician + pixel = sqrt(max(pixel/NORM-2*sigma*sigma,0)); + end + % Set the output pixel: + out(y,x,z) = pixel; + end + end +end +return; + +%-------------------------------------------------------------------------- +function [mu,Gx,Gy,Gz,factors,hcorr] = ComputeLocalFeatures3D( I, radii ) +% Computes the local mean value and the local gradients of a 3D image. +% +% I: the input image +% radii: a 3x1 vector of integers with the size of the neighborhood used +% to compute the local values. Gaussian windows are used +% generated for each dimension as gausswin(2*radii(d)+1). If not +% provided, [x=1;y=1;z=1] will be assumed +% mu: A 3D image, the same size as I, with local mean. +% Gx: A 3D image, the same size as I, with the gradient in the 'x' +% direction (dimension 2 in matlab). +% Gy: A 3D image, the same size as I, with the gradient in the 'y' +% direction (dimension 1 in matlab). +% Gz: A 3D image, the same size as I, with the gradient in the 'z' +% direction (dimension 3 in matlab). +% factors: a 3x1 vector with the factors to be applied to each gradient +% difference to estimate patch distances. +% hcorr: the effective reduction in the amount of noise in the +% distances between patches because of the fitting. + +I = double(I); + +% Check if the radii where provided: +if( nargin<2 ) + radii = [1;1;1]; +else + if( length(radii) ~= 3 ) + radii = ones(3,1)*radii(1); + end +end + +% Create the gaussian windows for each direction: +gx = gausswin( 2*radii(1) + 1 ); gx = gx./sum(gx); +gy = gausswin( 2*radii(2) + 1 ); gy = gy./sum(gy); +gz = gausswin( 2*radii(3) + 1 ); gz = gz./sum(gz); + +% Compute the local mean: +mu = My3DConv( I, gx, gy, gz ); + +% Create the differentiation kernels: +gdx = (-radii(1):radii(1))'; +gdx = (gdx.*gx)./sum(gdx.*gdx.*gx); +gdy = (-radii(2):radii(2))'; +gdy = (gdy.*gy)./sum(gdy.*gdy.*gy); +gdz = (-radii(3):radii(3))'; +gdz = (gdz.*gz)./sum(gdz.*gdz.*gz); + +% Create each gradient image (the minus sign is for consistence with the +% implementation of matlab's 'gradient' function: +Gx = -My3DConv( I, gdx, gy, gz ); +Gy = -My3DConv( I, gx, gdy, gz ); +Gz = -My3DConv( I, gx, gy, gdz ); + +% Compute the scaling factors: +factors(1) = sum( (-radii(1):radii(1)).*(-radii(1):radii(1)).*gx' ); +factors(2) = sum( (-radii(2):radii(2)).*(-radii(2):radii(2)).*gy' ); +factors(3) = sum( (-radii(3):radii(3)).*(-radii(3):radii(3)).*gz' ); + +% Compute the correction in the h factor. First, compute the 'X' matrix: +[x,y,z] = meshgrid( -radii(1):radii(1), ... + -radii(2):radii(2), ... + -radii(3):radii(3) ); +X = [ ones(size(x(:))), ... + x(:), y(:), z(:), ... + x(:).*x(:)/2, y(:).*y(:)/2, z(:).*z(:)/2, ... + x(:).*y(:), x(:).*z(:), y(:).*z(:) ]; +[g1,g2,g3] = meshgrid( gx, gy, gz ); +R = g1(:).*g2(:).*g3(:); +hcorr = sqrt(trace(diag(R)*X*(X'*X)^(-1)*X')); +return; + +%-------------------------------------------------------------------------- +function out = My3DConv( I, gx, gy, gz ) +% Computes a separable 3D convolution +gx = gx(:); +gx = permute(gx,[2,1,3]); +gy = gy(:); +gz = gz(:); +gz = permute(gz,[3,2,1]); +I = convn( I, gx, 'same' ); +I = convn( I, gy, 'same' ); +out = convn( I, gz, 'same' ); +return; diff --git a/README.md b/README.md new file mode 100644 index 0000000..3731cad --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +# thyroid-us + +Code repository for *Evaluation of Thyroid Nodules Seen on Ultrasound: Comparison of Deep Learning to Radiologists Using ACR TI-RADS*. + +It contains multi-task CNN model definition, training, and inference scripts. + +![Multi-task CNN](./multitask.png) + +We used Keras framework with TensorFlow backend. + +If you use this code in your research, consider citing: + +``` +@article{buda2019evaluation, + title={Evaluation of Thyroid Nodules Seen on Ultrasound: Comparison of Deep Learning to Radiologists Using ACR TI-RADS}, + author={Buda, Mateusz and Wildman-Tobriner, Benjamin and Hoang, Jenny K and Thayer, David and Tessler, Franklin N and Middleton, William D and Mazurowski, Maciej A}, + journal={Radiology}, + year={2019}, + publisher={Radiological Society of North America} +} +``` + +ROC curves comparing our method to radiologists for evaluating malignancy on the test set of 99 cases: + +![Test ROC](./roc_test.png) diff --git a/crop_images.m b/crop_images.m new file mode 100644 index 0000000..f346b7d --- /dev/null +++ b/crop_images.m @@ -0,0 +1,123 @@ +function [ ] = crop_images( images_regex, cals_path, target_dim ) +%CROP_IMAGES Crops nodules to square bounding box defined by callipers and +%resizes them to given dimensions. + +if nargin < 1 + images_regex = '/data/images-cv/*.PNG'; +end +if nargin < 2 + cals_path = '/data/detection/Calipers-cv'; +end +if nargin < 3 + target_dim = 160; +end + +margin = 32; + +images_dir = dir(images_regex); + +for i = 1:numel(images_dir) + + img_path = fullfile(images_dir(i).folder, images_dir(i).name); + cal_filename = strrep(images_dir(i).name, 'PNG', 'csv'); + cal_path = fullfile(cals_path, cal_filename); + + cal = csvread(cal_path); + cal = cal(:, 1:2); + + image = rgbread(img_path); + + image = medfilt3(image); + + image = crop2bbox(image, cal, margin); + + image = pad2square(image); + + image = imresize(image, [target_dim target_dim]); + + lims = [0.01 0.99]; + image = imadjust(image, stretchlim(image, lims), []); + + image = FastNonLocalMeans3D(double(image)/255, 0.1); + image = uint8(image * 255); + + imwrite(image, img_path); + +end + +end + + +function [ padded ] = pad2square( img ) + + if size(img, 1) == size(img, 2) + padded = img; + return; + end + + if size(img, 1) < size(img, 2) + ypad_post = ceil((size(img, 2) - size(img, 1)) / 2.0); + ypad_pre = floor((size(img, 2) - size(img, 1)) / 2.0); + xpad_post = 0; + xpad_pre = 0; + else + xpad_post = ceil((size(img, 1) - size(img, 2)) / 2.0); + xpad_pre = floor((size(img, 1) - size(img, 2)) / 2.0); + ypad_post = 0; + ypad_pre = 0; + end + + padded = padarray(img, [ypad_post xpad_post], 0, 'post'); + padded = padarray(padded, [ypad_pre xpad_pre], 0, 'pre'); + +end + +function [ cropped ] = crop2bbox( img, cals, margin ) + + height = size(img, 1); + width = size(img, 2); + + if size(cals, 1) <= 2 + center = [min(cals(:, 1)) + abs(cals(1, 1) - cals(2, 1)) / 2; ... + min(cals(:, 2)) + abs(cals(1, 2) - cals(2, 2)) / 2]; + R = [cosd(90) -sind(90); sind(90) cosd(90)]; + cals = padarray(cals, [2 0], 1, 'post'); + cals(3, 1:2) = (R * (cals(1, 1:2)' - center) + center)'; + cals(4, 1:2) = (R * (cals(2, 1:2)' - center) + center)'; + end + + ymin = max(1, min(cals(:, 1)) - margin); + ymax = min(max(cals(:, 1)) + margin, height); + xmin = max(1, min(cals(:, 2)) - margin); + xmax = min(max(cals(:, 2)) + margin, width); + + ymid = (ymax + ymin) / 2; + xmid = (xmax + xmin) / 2; + + box_size = max((ymax - ymin), (xmax - xmin)) / 2; + box_size = max(box_size, 80); + + ymin = round(max(1, ymid - box_size)); + ymax = round(min(ymid + box_size, height)); + xmin = round(max(1, xmid - box_size)); + xmax = round(min(xmid + box_size, width)); + + cropped = img(ymin:ymax, xmin:xmax); + +end + +function [ rgb ] = rgbread( img_path ) +%RGBREAD Reads image from given path and transforms it to RGB image if +%needed + +[img, map] = imread(img_path); + +if map + rgb = ind2rgb(img, map); +else + if size(img, 3) == 1 + rgb = cat(3, img, img, img); + else + rgb = img; + end +end diff --git a/data.py b/data.py new file mode 100644 index 0000000..ff93b28 --- /dev/null +++ b/data.py @@ -0,0 +1,414 @@ +import numpy as np +import pandas as pd +from glob import glob +from imgaug import augmenters +from random import seed, randint +from scipy.misc import imread + +data_path = "./data.csv" +images_dir = "/data/images-cv" +test_images_dir = "/data/images-test" + +random_seed = 3 +total_folds = 10 + + +def feature_classes(feature): + df = pd.read_csv(data_path) + df.fillna(0, inplace=True) + df.Calcs1.replace(0, "None", inplace=True) + if feature == "composition": + return list(pd.get_dummies(df.Composition, prefix="", prefix_sep="").columns) + if feature == "echogenicity": + return list(pd.get_dummies(df.Echogenicity, prefix="", prefix_sep="").columns) + if feature == "shape": + return ["wider", "taller"] + if feature == "calcification": + return list(pd.get_dummies(df.Calcs1, prefix="", prefix_sep="").columns) + if feature == "margin": + return list(pd.get_dummies(df.MargA, prefix="", prefix_sep="").columns) + return [] + + +def fold_pids(fold, test=True): + # get patient IDs for given training fold in 10-fold cross-validation + # if test=False, test patient IDs are returned + df = pd.read_csv(data_path) + all_files = glob(os.path.join(images_dir, "*.PNG")) + val_ids = validation_ids(fold, df[["ID", "Cancer"]]) + pids = [] + for f_path in all_files: + pid = fname2pid(f_path) + if (test and pid in val_ids) or (not test and pid not in val_ids): + pids.append(pid) + return pids + + +def test_pids(): + # get patient IDs for test cases + test_files = sorted(glob(os.path.join(test_images_dir, "*.PNG"))) + pids = [] + for f_path in test_files: + pids.append(fname2pid(f_path)) + return pids + + +def train_pids(): + # get patient IDs for training cases + train_files = sorted(glob(images_dir + "/*.PNG")) + pids = [] + for f_path in train_files: + pids.append(fname2pid(f_path)) + return pids + + +def train_data(): + # get images and labels for training cases + df = pd.read_csv(data_path) + df.fillna(0, inplace=True) + df.Calcs1.replace(0, "None", inplace=True) + + df_cancer = df[["ID", "Cancer"]] + df_compos = pd.concat([df.ID, pd.get_dummies(df.Composition)], axis=1) + df_echo = pd.concat([df.ID, pd.get_dummies(df.Echogenicity)], axis=1) + df_shape = df[["ID", "Shape"]] + df_shape["Shape"] = df_shape.apply(lambda row: 1 if row.Shape == "y" else 0, axis=1) + df_calcs = pd.concat([df.ID, pd.get_dummies(df.Calcs1)], axis=1) + df_margin = pd.concat([df.ID, pd.get_dummies(df.MargA)], axis=1) + + train_files = sorted(glob(os.path.join(images_dir, "*.PNG"))) + X_train = [] + + # labels for malignancy and 5 TI-RADS features + y_train_cancer = [] + y_train_compos = [] + y_train_echo = [] + y_train_shape = [] + y_train_calcs = [] + y_train_margin = [] + + for f_path in train_files: + pid = fname2pid(f_path) + X_train.append( + np.expand_dims( + np.array(imread(f_path, flatten=False, mode="F")).astype(np.float32), + axis=-1, + ) + ) + y_train_cancer.append( + df_cancer[df_cancer.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + y_train_compos.append( + df_compos[df_compos.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + y_train_echo.append( + df_echo[df_echo.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + # for shape feature, only assign positive label to transversal view + if "trans" in f_path: + y_train_shape.append( + df_shape[df_shape.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + else: + y_train_shape.append(np.array([0]).astype(np.float32)) + y_train_calcs.append( + df_calcs[df_calcs.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + y_train_margin.append( + df_margin[df_margin.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + + X_train = np.array(X_train) + + # normalize + X_train /= 255. + X_train -= 0.5 + X_train *= 2. + + y_train = { + "out_cancer": np.array(y_train_cancer), + "out_compos": np.array(y_train_compos), + "out_echo": np.array(y_train_echo), + "out_shape": np.array(y_train_shape), + "out_calcs": np.array(y_train_calcs), + "out_margin": np.array(y_train_margin), + } + + return X_train, y_train + + +def test_data(): + # get images and labels for test cases + df = pd.read_csv(data_path) + df.fillna(0, inplace=True) + df.Calcs1.replace(0, "None", inplace=True) + + df_cancer = df[["ID", "Cancer"]] + df_compos = pd.concat([df.ID, pd.get_dummies(df.Composition)], axis=1) + df_echo = pd.concat([df.ID, pd.get_dummies(df.Echogenicity)], axis=1) + df_shape = df[["ID", "Shape"]] + df_shape["Shape"] = df_shape.apply(lambda row: 1 if row.Shape == "y" else 0, axis=1) + df_calcs = pd.concat([df.ID, pd.get_dummies(df.Calcs1)], axis=1) + df_margin = pd.concat([df.ID, pd.get_dummies(df.MargA)], axis=1) + + test_files = sorted(glob(test_images_dir + "/*.PNG")) + + X_test = [] + + # labels for malignancy and 5 TI-RADS features + y_test_cancer = [] + y_test_compos = [] + y_test_echo = [] + y_test_shape = [] + y_test_calcs = [] + y_test_margin = [] + + for f_path in test_files: + pid = fname2pid(f_path) + X_test.append( + np.expand_dims( + np.array(imread(f_path, flatten=False, mode="F")).astype(np.float32), + axis=-1, + ) + ) + y_test_cancer.append( + df_cancer[df_cancer.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + y_test_compos.append( + df_compos[df_compos.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + y_test_echo.append( + df_echo[df_echo.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + if "trans" in f_path: + y_test_shape.append( + df_shape[df_shape.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + else: + y_test_shape.append(np.array([0]).astype(np.float32)) + y_test_calcs.append( + df_calcs[df_calcs.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + y_test_margin.append( + df_margin[df_margin.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + + X_test = np.array(X_test) + + # normalize + X_test /= 255. + X_test -= 0.5 + X_test *= 2. + + y_test = [ + np.array(y_test_cancer), + np.array(y_test_compos), + np.array(y_test_echo), + np.array(y_test_shape), + np.array(y_test_calcs), + np.array(y_test_margin), + ] + + return X_test, y_test + + +def fold_data(fold): + # get images and labels for given fold + df = pd.read_csv(data_path) + df.fillna(0, inplace=True) + df.Calcs1.replace(0, "None", inplace=True) + + df_cancer = df[["ID", "Cancer"]] + df_compos = pd.concat([df.ID, pd.get_dummies(df.Composition)], axis=1) + df_echo = pd.concat([df.ID, pd.get_dummies(df.Echogenicity)], axis=1) + df_shape = df[["ID", "Shape"]] + df_shape["Shape"] = df_shape.apply(lambda row: 1 if row.Shape == "y" else 0, axis=1) + df_calcs = pd.concat([df.ID, pd.get_dummies(df.Calcs1)], axis=1) + df_margin = pd.concat([df.ID, pd.get_dummies(df.MargA)], axis=1) + + all_files = glob(images_dir + "/*.PNG") + val_ids = validation_ids(fold, df_cancer) + + X_train = [] + X_test = [] + + # labels for malignancy and 5 TI-RADS features + y_train_cancer = [] + y_train_compos = [] + y_train_echo = [] + y_train_shape = [] + y_train_calcs = [] + y_train_margin = [] + y_test_cancer = [] + y_test_compos = [] + y_test_echo = [] + y_test_shape = [] + y_test_calcs = [] + y_test_margin = [] + + for f_path in all_files: + pid = fname2pid(f_path) + image = np.expand_dims( + np.array(imread(f_path, flatten=False, mode="F")).astype(np.float32), + axis=-1, + ) + if pid in val_ids: + X_test.append(image) + y_test_cancer.append( + df_cancer[df_cancer.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + y_test_compos.append( + df_compos[df_compos.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + y_test_echo.append( + df_echo[df_echo.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + if "trans" in f_path: + y_test_shape.append( + df_shape[df_shape.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + else: + y_test_shape.append(np.array([0]).astype(np.float32)) + y_test_calcs.append( + df_calcs[df_calcs.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + y_test_margin.append( + df_margin[df_margin.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + else: + X_train.append(image) + y_train_cancer.append( + df_cancer[df_cancer.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + y_train_compos.append( + df_compos[df_compos.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + y_train_echo.append( + df_echo[df_echo.ID == pid].as_matrix().flatten()[1:].astype(np.float32) + ) + if "trans" in f_path: + y_train_shape.append( + df_shape[df_shape.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + else: + y_train_shape.append(np.array([0]).astype(np.float32)) + y_train_calcs.append( + df_calcs[df_calcs.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + y_train_margin.append( + df_margin[df_margin.ID == pid] + .as_matrix() + .flatten()[1:] + .astype(np.float32) + ) + + X_train = np.array(X_train) + X_test = np.array(X_test) + + # normalize + X_train /= 255. + X_train -= 0.5 + X_train *= 2. + X_test /= 255. + X_test -= 0.5 + X_test *= 2. + + y_train = { + "out_cancer": np.array(y_train_cancer), + "out_compos": np.array(y_train_compos), + "out_echo": np.array(y_train_echo), + "out_shape": np.array(y_train_shape), + "out_calcs": np.array(y_train_calcs), + "out_margin": np.array(y_train_margin), + } + + y_test = [ + np.array(y_test_cancer), + np.array(y_test_compos), + np.array(y_test_echo), + np.array(y_test_shape), + np.array(y_test_calcs), + np.array(y_test_margin), + ] + + return X_train, y_train, X_test, y_test + + +def augment(X): + # data augmentation + seq = augmenters.Sequential( + [ + augmenters.Fliplr(0.5), + augmenters.Flipud(0.5), + augmenters.Affine(rotate=(-15, 15)), + augmenters.Affine(shear=(-15, 15)), + augmenters.Affine(translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}), + augmenters.Affine(scale=(0.9, 1.1)), + ] + ) + return seq.augment_images(X) + + +def validation_ids(fold, df_cancer): + # get patient IDs in given fold + pid_set = set() + all_files = glob(images_dir + "/*.PNG") + for f_path in all_files: + pid = fname2pid(f_path) + pid_set.add(pid) + + val_ids = [] + + # set random seed to get the same split every time + seed(random_seed) + # stratified split + malignant_fold = 0 + for pid in sorted(pid_set): + label = df_cancer[df_cancer.ID == pid].as_matrix().flatten()[1:] + if label == 1: + if fold == np.mod(malignant_fold, total_folds): + val_ids.append(pid) + malignant_fold += 1 + else: + if fold == randint(0, total_folds - 1): + val_ids.append(pid) + + return val_ids + + +def fname2pid(fname): + # get patient ID from image file name + return fname.split("/")[-1].split(".")[0].lstrip("0") diff --git a/focal_loss.py b/focal_loss.py new file mode 100644 index 0000000..064f4ae --- /dev/null +++ b/focal_loss.py @@ -0,0 +1,11 @@ +import tensorflow as tf +from keras import backend as K + + +def focal_loss(gamma=2, alpha=2): + + def focal_loss_fixed(y_true, y_pred): + pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred) + return -K.sum(alpha * K.pow(1. - pt, gamma) * K.log(pt + 1e-6), axis=-1) + + return focal_loss_fixed diff --git a/model.py b/model.py new file mode 100644 index 0000000..33adae3 --- /dev/null +++ b/model.py @@ -0,0 +1,113 @@ +from keras.initializers import Constant +from keras.layers import Input, Conv2D, Flatten, Activation, MaxPool2D, Dropout +from keras.models import Model + +from focal_loss import focal_loss + +img_width, img_height = 160, 160 + +loss_dict = { + "out_cancer": focal_loss(), + "out_compos": focal_loss(), + "out_echo": focal_loss(), + "out_shape": focal_loss(), + "out_calcs": focal_loss(), + "out_margin": focal_loss(), +} + +loss_weights_dict = { + "out_cancer": 1.0, + "out_compos": 1.0, + "out_echo": 1.0, + "out_shape": 1.0, + "out_calcs": 1.0, + "out_margin": 1.0, +} + + +def multitask_cnn(): + # 160x160x1 + input_tensor = Input(shape=(img_height, img_width, 1), name="thyroid_input") + # 160x160x8 + x = Conv2D(8, (3, 3), padding="same", activation="relu")(input_tensor) + # 80x80x8 + x = MaxPool2D((2, 2), strides=(2, 2))(x) + # 80x80x12 + x = Conv2D(12, (3, 3), padding="same", activation="relu")(x) + # 40x40x12 + x = MaxPool2D((2, 2), strides=(2, 2))(x) + # 40x40x16 + x = Conv2D(16, (3, 3), padding="same", activation="relu")(x) + # 20x20x16 + x = MaxPool2D((2, 2), strides=(2, 2))(x) + # 20x20x24 + x = Conv2D(24, (3, 3), padding="same", activation="relu")(x) + # 10x10x24 + x = MaxPool2D((2, 2), strides=(2, 2))(x) + # 10x10x32 + x = Conv2D(32, (3, 3), padding="same", activation="relu")(x) + # 5x5x32 + x = MaxPool2D((2, 2), strides=(2, 2))(x) + # 5x5x48 + x = Conv2D(48, (3, 3), padding="same", activation="relu")(x) + # 5x5x48 + x = Dropout(0.5)(x) + + y_cancer = Conv2D( + filters=1, + kernel_size=(5, 5), + kernel_initializer="glorot_normal", + bias_initializer=Constant(value=-0.9), + )(x) + y_cancer = Flatten()(y_cancer) + y_cancer = Activation("sigmoid", name="out_cancer")(y_cancer) + + y_compos = Conv2D( + filters=5, + kernel_size=(5, 5), + kernel_initializer="glorot_normal", + bias_initializer=Constant(value=-0.9), + )(x) + y_compos = Flatten()(y_compos) + y_compos = Activation("softmax", name="out_compos")(y_compos) + + y_echo = Conv2D( + filters=5, + kernel_size=(5, 5), + kernel_initializer="glorot_normal", + bias_initializer=Constant(value=-0.9), + )(x) + y_echo = Flatten()(y_echo) + y_echo = Activation("softmax", name="out_echo")(y_echo) + + y_shape = Conv2D( + filters=1, + kernel_size=(5, 5), + kernel_initializer="glorot_normal", + bias_initializer=Constant(value=-0.9), + )(x) + y_shape = Flatten()(y_shape) + y_shape = Activation("sigmoid", name="out_shape")(y_shape) + + y_calcs = Conv2D( + filters=5, + kernel_size=(5, 5), + kernel_initializer="glorot_normal", + bias_initializer=Constant(value=-0.9), + )(x) + y_calcs = Flatten()(y_calcs) + y_calcs = Activation("softmax", name="out_calcs")(y_calcs) + + y_margin = Conv2D( + filters=4, + kernel_size=(5, 5), + kernel_initializer="glorot_normal", + bias_initializer=Constant(value=-0.9), + )(x) + y_margin = Flatten()(y_margin) + y_margin = Activation("softmax", name="out_margin")(y_margin) + + return Model( + inputs=[input_tensor], + outputs=[y_cancer, y_compos, y_echo, y_shape, y_calcs, y_margin], + ) diff --git a/multitask.png b/multitask.png new file mode 100644 index 0000000..3af6df9 Binary files /dev/null and b/multitask.png differ diff --git a/plots.py b/plots.py new file mode 100644 index 0000000..3afd6f2 --- /dev/null +++ b/plots.py @@ -0,0 +1,24 @@ +import matplotlib.pyplot as plt +from sklearn.metrics import roc_auc_score, roc_curve + + +def plot_roc(y_true, y_pred, figname="roc.png"): + fpr, tpr, thresholds = roc_curve(y_true, y_pred) + roc_auc = roc_auc_score(y_true, y_pred) + + print("roc auc = {}".format(roc_auc)) + + plt.rcParams.update({"font.size": 24}) + + fig = plt.figure(figsize=(10, 10)) + plt.plot(fpr, tpr, color="blue", lw=2, label="ROC curve (area = %0.2f)" % roc_auc) + plt.plot([0, 1], [0, 1], color="gray", lw=1, linestyle="--") + plt.grid(color="silver", alpha=0.3, linestyle="--", linewidth=1) + plt.xlim([0.0, 1.0]) + plt.ylim([0.0, 1.0]) + plt.xlabel("False Positive Rate") + plt.ylabel("True Positive Rate") + plt.legend(loc="lower right") + plt.tight_layout() + plt.savefig(figname, bbox_inches="tight") + plt.close(fig) diff --git a/roc_test.png b/roc_test.png new file mode 100644 index 0000000..440fe5c Binary files /dev/null and b/roc_test.png differ diff --git a/test.py b/test.py new file mode 100644 index 0000000..3fc0d83 --- /dev/null +++ b/test.py @@ -0,0 +1,73 @@ +import csv +import matplotlib.pyplot as plt +import numpy as np +import os +import sys +import tensorflow as tf +from keras import backend as K +from keras.models import load_model +from sklearn.metrics import roc_auc_score, roc_curve + +from data import test_data, test_pids +from focal_loss import focal_loss +from plots import plot_roc + +checkpoints_dir = "/data/test/checkpoints/" +batch_size = 128 +nb_categories = 1 + + +def predict(): + weights_path = os.path.join(checkpoints_dir, "weights.h5") + + net = load_model(weights_path, custom_objects={"focal_loss_fixed": focal_loss()}) + + X_test, y_test = test_data() + + preds = net.predict(X_test, batch_size=batch_size, verbose=1) + + return preds[0], y_test[0] + + +def test(): + predictions, targets = predict() + + cases_predictions = {} + cases_targets = {} + pids = test_pids() + for i in range(len(pids)): + pid = pids[i] + prev_pred = cases_predictions.get(pid, np.zeros(nb_categories)) + preds = predictions[i] + cases_predictions[pid] = prev_pred + preds + cases_targets[pid] = targets[i] + + y_pred = [] + y_true = [] + y_id = [] + for pid in cases_predictions: + y_pred.append(cases_predictions[pid][0]) + y_true.append(cases_targets[pid]) + y_id.append(pid) + + with open("./predictions_test.csv", "w") as csvfile: + csvwriter = csv.writer(csvfile) + csvwriter.writerow(["ID", "Prediction", "Cancer"]) + for pid, prediction, gt in zip(y_id, y_pred, y_true): + pid = pid.lstrip("0") + csvwriter.writerow([pid, prediction, gt[0]]) + + plot_roc(y_true, y_pred, figname="roc_test.png") + + +if __name__ == "__main__": + + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + config.allow_soft_placement = True + sess = tf.Session(config=config) + K.set_session(sess) + + device = "/gpu:" + sys.argv[1] + with tf.device(device): + test() diff --git a/test_cv.py b/test_cv.py new file mode 100644 index 0000000..09ac27c --- /dev/null +++ b/test_cv.py @@ -0,0 +1,93 @@ +import csv +import matplotlib.pyplot as plt +import numpy as np +import os +import sys +import tensorflow as tf +from keras import backend as K +from keras.models import load_model +from sklearn.metrics import roc_auc_score, roc_curve + +from data import fold_data, fold_pids +from focal_loss import focal_loss +from plots import plot_roc + +checkpoints_dir = "/data/checkpoints//" +weights_file = "weights.h5" +batch_size = 128 +nb_categories = 1 + + +def predict(fold): + fold_checkpoints_dir = checkpoints_dir.replace("", str(fold)) + weights_path = os.path.join(fold_checkpoints_dir, weights_file) + + net = load_model(weights_path, custom_objects={"focal_loss_fixed": focal_loss()}) + + x_train, y_train, x_test, y_test = fold_data(fold) + + preds = net.predict(x_test, batch_size=batch_size, verbose=1) + y = y_test[0] + + return preds[0], y + + +def test(folds=10): + pids = [] + predictions = np.zeros((0, nb_categories)) + targets = [] + pid_fold = [] + + for f in range(folds): + preds, t = predict(f) + predictions = np.vstack((predictions, preds)) + pids.extend(fold_pids(f)) + targets.extend(t) + pid_fold.extend([f] * len(t)) + + print("{} images".format(len(pids))) + + cases_predictions = {} + cases_targets = {} + cases_folds = {} + for i in range(len(pids)): + pid = pids[i] + prev_pred = cases_predictions.get(pid, np.zeros(nb_categories)) + preds = predictions[i] + cases_predictions[pid] = prev_pred + preds + cases_targets[pid] = targets[i] + cases_folds[pid] = pid_fold[i] + + print("{} cases".format(len(cases_predictions))) + + y_pred = [] + y_true = [] + y_id = [] + y_fold = [] + for pid in cases_predictions: + y_pred.append(cases_predictions[pid][0]) + y_true.append(cases_targets[pid]) + y_id.append(pid) + y_fold.append(cases_folds[pid]) + + with open("./predictions_cv.csv", "w") as csvfile: + csvwriter = csv.writer(csvfile) + csvwriter.writerow(["ID", "Prediction", "Cancer", "Fold"]) + for pid, prediction, gt, f in zip(y_id, y_pred, y_true, y_fold): + pid = pid.lstrip("0") + csvwriter.writerow([pid, prediction, gt[0], f]) + + plot_roc(y_true, y_pred, figname="roc_cv.png") + + +if __name__ == "__main__": + + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + config.allow_soft_placement = True + sess = tf.Session(config=config) + K.set_session(sess) + + device = "/gpu:" + sys.argv[1] + with tf.device(device): + test() diff --git a/train.py b/train.py new file mode 100644 index 0000000..d656be0 --- /dev/null +++ b/train.py @@ -0,0 +1,82 @@ +import numpy as np +import os +import sys +import tensorflow as tf +from keras import backend as K +from keras.callbacks import TensorBoard +from keras.optimizers import RMSprop +from sklearn.metrics import roc_auc_score + +from data import augment, train_data, test_data +from model import multitask_cnn, loss_dict, loss_weights_dict + +checkpoints_dir = "/data/test/checkpoints/" +logs_dir = "/data/test/logs/" + +batch_size = 128 +epochs = 250 +base_lr = 0.001 + + +def train(): + if not os.path.exists(checkpoints_dir): + os.makedirs(checkpoints_dir) + if not os.path.exists(logs_dir): + os.makedirs(logs_dir) + + X_train, y_train = train_data() + X_test, y_test = test_data() + + print("Training and validation data processed.") + + model = multitask_cnn() + + optimizer = RMSprop(lr=base_lr) + model.compile( + optimizer=optimizer, + loss=loss_dict, + loss_weights=loss_weights_dict, + metrics=["accuracy"], + ) + + training_log = TensorBoard(log_dir=os.path.join(logs_dir, "log"), write_graph=False) + + callbacks = [training_log] + + for e in range(epochs): + X_train_augmented = augment(X_train) + model.fit( + {"thyroid_input": X_train_augmented}, + y_train, + validation_data=(X_test, y_test), + batch_size=batch_size, + epochs=e + 1, + initial_epoch=e, + shuffle=True, + callbacks=callbacks, + ) + + if np.mod(e + 1, 10) == 0: + y_pred = model.predict(X_train, batch_size=batch_size, verbose=1) + auc_train = roc_auc_score(y_train["out_cancer"], y_pred[0]) + y_pred = model.predict(X_test, batch_size=batch_size, verbose=1) + auc_test = roc_auc_score(y_test[0], y_pred[0]) + with open(os.path.join(logs_dir, "auc.txt"), "a") as auc_file: + auc_file.write("{},{}\n".format(auc_train, auc_test)) + + model.save(os.path.join(checkpoints_dir, "weights.h5")) + + print("Training completed.") + + +if __name__ == "__main__": + + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + config.allow_soft_placement = True + sess = tf.Session(config=config) + K.set_session(sess) + + device = "/gpu:" + sys.argv[1] + with tf.device(device): + train() diff --git a/train_cv.py b/train_cv.py new file mode 100644 index 0000000..cb64587 --- /dev/null +++ b/train_cv.py @@ -0,0 +1,92 @@ +import numpy as np +import os +import sys +import tensorflow as tf +from keras import backend as K +from keras.callbacks import TensorBoard +from keras.optimizers import RMSprop +from sklearn.metrics import roc_auc_score + +from data import fold_data, augment +from model import multitask_cnn, loss_dict, loss_weights_dict + +checkpoints_dir = "/data/checkpoints//" +logs_dir = "/data/logs//" + +batch_size = 128 +epochs = 250 +base_lr = 0.001 + + +def train(fold): + fold_checkpoints_dir = checkpoints_dir.replace("", str(fold)) + fold_logs_dir = logs_dir.replace("", str(fold)) + + if not os.path.exists(fold_checkpoints_dir): + os.makedirs(fold_checkpoints_dir) + if not os.path.exists(fold_logs_dir): + os.makedirs(fold_logs_dir) + + x_train, y_train, x_test, y_test = fold_data(fold) + + print("Training and validation data processed.") + print("Training data shape: {}".format(len(x_train))) + print("Test data shape: {}".format(len(x_test))) + + model = multitask_cnn() + + optimizer = RMSprop(lr=base_lr) + + model.compile( + optimizer=optimizer, + loss=loss_dict, + loss_weights=loss_weights_dict, + metrics=["accuracy"], + ) + + training_log = TensorBoard( + log_dir=os.path.join(fold_logs_dir, "log"), write_graph=False + ) + + callbacks = [training_log] + + y_train_cancer = y_train["out_cancer"] + y_test_cancer = y_test[0] + + for e in range(epochs): + x_train_augmented = augment(x_train) + model.fit( + x={"thyroid_input": x_train_augmented}, + y=y_train, + validation_data=(x_test, y_test), + batch_size=batch_size, + epochs=e + 1, + initial_epoch=e, + shuffle=True, + callbacks=callbacks, + ) + + if np.mod(e + 1, 10) == 0: + y_pred = model.predict(x_train, batch_size=batch_size, verbose=1) + auc_train = roc_auc_score(y_train_cancer, y_pred[0]) + y_pred = model.predict(x_test, batch_size=batch_size, verbose=1) + auc_test = roc_auc_score(y_test_cancer, y_pred[0]) + with open(os.path.join(fold_logs_dir, "auc.txt"), "a") as auc_file: + auc_file.write("{},{}\n".format(auc_train, auc_test)) + + model.save(os.path.join(fold_checkpoints_dir, "weights.h5")) + + print("Training fold {} completed.".format(fold)) + + +if __name__ == "__main__": + + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + config.allow_soft_placement = True + sess = tf.Session(config=config) + K.set_session(sess) + + device = "/gpu:" + sys.argv[1] + with tf.device(device): + train(int(sys.argv[2]))