diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..11c25cb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,274 @@
+
+# Created by https://www.gitignore.io/api/osx,linux,matlab,python,pycharm+all,intellij+all,jupyternotebook
+
+### Intellij+all ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/dictionaries
+
+# Sensitive or high-churn files:
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.xml
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+
+# Gradle:
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# CMake
+cmake-build-debug/
+
+# Mongo Explorer plugin:
+.idea/**/mongoSettings.xml
+
+## File-based project format:
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+/out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# Ruby plugin and RubyMine
+/.rakeTasks
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+### Intellij+all Patch ###
+# Ignores the whole idea folder
+# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360
+
+.idea/
+
+### JupyterNotebook ###
+.ipynb_checkpoints
+*/.ipynb_checkpoints/*
+
+# Remove previous ipynb_checkpoints
+#   git rm -r .ipynb_checkpoints/
+#
+### Linux ###
+*~
+
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+
+### Matlab ###
+##---------------------------------------------------
+## Remove autosaves generated by the Matlab editor
+## We have git for backups!
+##---------------------------------------------------
+
+# Windows default autosave extension
+*.asv
+
+# OSX / *nix default autosave extension
+*.m~
+
+# Compiled MEX binaries (all platforms)
+*.mex*
+
+# Simulink Code Generation
+slprj/
+
+# Session info
+octave-workspace
+
+# Simulink autosave extension
+*.autosave
+
+### OSX ###
+*.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+### PyCharm+all ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+
+# Sensitive or high-churn files:
+
+# Gradle:
+
+# CMake
+
+# Mongo Explorer plugin:
+
+## File-based project format:
+
+## Plugin-specific files:
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# Ruby plugin and RubyMine
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+### PyCharm+all Patch ###
+# Ignores the whole idea folder
+# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360
+
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+.pytest_cache/
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule.*
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+
+# End of https://www.gitignore.io/api/osx,linux,matlab,python,pycharm+all,intellij+all,jupyternotebook
+
+
diff --git a/FastNonLocalMeans3D.m b/FastNonLocalMeans3D.m
new file mode 100644
index 0000000..34eaf39
--- /dev/null
+++ b/FastNonLocalMeans3D.m
@@ -0,0 +1,300 @@
+function out = FastNonLocalMeans3D( V, sigma, beta, rs, rc, ps, flag, block )
+% A fast implementation of the non-local means based on distances in
+% the features space. The full algorithm is discussed in detail in the
+% following paper:
+%
+%      A. Tristán-Vega, V. García-Pérez, S. Aja-Fernández, C.-F. Westin
+%      "Efficient and robust nonlocal means denoising of MR data based on
+%      salient features matching"
+%      Computer Methods and Programs in Biomedicine, vol. 105, pp. 131-144
+%      (2012)
+%
+% If you are willing to use this software for your research, please cite
+% this work.
+%
+% NOTE: Some of the computational features described in the paper above
+% cannot be exploited in the matlab implementation. If performance is an
+% issue for you, we strongly encourage you use the C++/ITK implementation
+% available at: http://www.nitrc.org/projects/unlmeans, for which both
+% source code and pre-compiled executables can be downloaded.
+%
+%   USAGE: out = FastNonLocalMeans( V, sigma [beta, rs, rc, ps, flag] )
+%
+%    V:     The input volume to be filtered (3D). - MANDATORY
+%    sigma: The noise power in the input image. In the Gaussian case, this
+%           is the standard deviation of the Gaussian noise at each pixel.
+%           In the Rician case, it is the standard deviation of noise in
+%           the original, Gaussian distributed, real and imaginary parts of
+%           the signal (whose modulus is computed to get the Rician
+%           variable). - MANDATORY
+%    beta:  The filtering parameter. The larger its value, the more
+%           aggressive the filtering. The smaller its value, the better
+%           details are preserved. It should be in the range of 0.8 to 1.2
+%           for best performance (Default: 1.0).
+%    rs:    A 3x1 vector with the search radii (Default: 2,2,2).
+%    rc:    A 3x1 vector with the comparison radii (Default: 1,1,1).
+%    ps:    The preselection threshold. All those pixels in the search
+%           window whose normalized distance to the center pixel is larger
+%           than this value are automatically removed from the weighted
+%           average (Default: 2.0).
+%    flag:  Must be either 'gaussian' (the default) or 'rician'. In the
+%           latter case, the weighted average is performed over the squared
+%           pixels, and the filtered value is computed as
+%           sqrt(mu-2·sigma^2) so that the estimate becomes unbiased.
+%    block: This second flag tells the algorithm if the computation of the
+%           weights within the search window must be done with a loop (0,
+%           the default since it seems to be faster for the default search
+%           window) or it must be done with vector operations (1). Choose 0
+%           with small search radii or 1 with larger search radii.
+%
+%    out:   The filtered volume.
+
+if( nargin<2 )
+    error('At least the input volume and the noise power must be provided');
+end
+
+if( nargin<3 )
+    beta = 1.0;
+end
+h = beta*sigma;
+
+if( nargin<4 )
+    rs = [2;2;2];
+else
+    if( length(rs)~=3 )
+        rs = rs(1).*ones(3,1);
+    end
+end
+
+if( nargin<5 )
+    rc = [1;1;1];
+else
+    if( length(rc)~=3 )
+        rc = rc(1).*ones(3,1);
+    end
+end
+
+if( nargin<6 )
+    ps = 2.0;
+end
+
+if( nargin<7 )
+    FLAG = 0;
+else
+    if( strcmpi('gaussian',flag) )
+        FLAG = 0;
+    elseif( strcmpi('rician',flag) )
+        FLAG = 1;
+    else
+        error(['Unknown filtering type: ',flag]);
+    end
+end
+
+if( nargin<8 )
+    block = 0;
+end
+
+% Compute the size of the image:
+[Y,X,Z] = size(V);
+
+% Compute the features map:
+[mu,Gx,Gy,Gz,factors,hcorr] = ComputeLocalFeatures3D( V, rc );
+
+% Compute the effective value of h as described in the paper:
+h = hcorr*h;
+
+% Initiallize the output:
+out = zeros(Y,X,Z);
+
+% Loop along the pixels:
+for x=1:X
+    for y=1:Y
+        for z=1:Z
+            % We are filtering the pixel (x,y,z). First, create a
+            % neighborhood around this pixel checking for out-of-bound
+            % indices:
+            mx = max( x-rs(1), 1 );
+            MX = min( x+rs(1), X );
+            my = max( y-rs(2), 1 );
+            MY = min( y+rs(2), Y );
+            mz = max( z-rs(3), 1 );
+            MZ = min( z+rs(3), Z );
+            % Keep the center values:
+            mu0 = mu(y,x,z);
+            gx0 = Gx(y,x,z);
+            gy0 = Gy(y,x,z);
+            gz0 = Gz(y,x,z);
+            if( block==1 )
+                % VECTOR IMPLEMENTATION (SEEMS TO BE SLOWER):
+                % Get the valeus of the pixels in the whole search neihborhood:
+                vals  = V(my:MY,mx:MX,mz:MZ);
+                % Get the mean values and gradients of the pixels in the whole
+                % search neighborhood:
+                mui   = mu(my:MY,mx:MX,mz:MZ);
+                gxi   = Gx(my:MY,mx:MX,mz:MZ);
+                gyi   = Gy(my:MY,mx:MX,mz:MZ);
+                gzi   = Gz(my:MY,mx:MX,mz:MZ);
+                % Compute the distances:
+                dists = (mui-mu0).*(mui-mu0) + ...
+                    (gxi-gx0).*(gxi-gx0)*factors(1) + ...
+                    (gyi-gy0).*(gyi-gy0)*factors(2) + ...
+                    (gzi-gz0).*(gzi-gz0)*factors(3);
+                % Normalize the distances:
+                dists = dists./(h*h);
+                % Compute the weights:
+                wis   = exp(-dists);
+                % Set to 0 the normalized distances above the threshold to
+                % execute pre-selection:
+                wis(dists>ps) = 0;
+                % Avoid over-weighting of the central pixel:
+                wis(wis>0.367879441171442) = 0.367879441171442;
+                % Compute the normalization factor:
+                NORM  = sum(wis(:));
+                % Filter the pixel; average the pixels or their squared values
+                % depending on the filtering type:
+                if( FLAG==0 ) % Gaussian
+                    pixel = sum(wis(:).*vals(:));
+                else % Rician
+                    pixel = sum(wis(:).*vals(:).*vals(:));
+                end
+            else
+                % LOOP IMPLEMENTATION (SEEMS TO BE FASTER):
+                pixel = 0.0;
+                NORM  = 0.0;
+                for s=mx:MX
+                    for t=my:MY
+                        for u=mz:MZ
+                            % Get the current features:
+                            mui  = mu(t,s,u);
+                            gxi  = Gx(t,s,u);
+                            gyi  = Gy(t,s,u);
+                            gzi  = Gz(t,s,u);
+                            % Compute the distance and normalize:
+                            dist = (mu0-mui)*(mu0-mui) + ...
+                                (gx0-gxi)*(gx0-gxi)*factors(1) + ...
+                                (gy0-gyi)*(gy0-gyi)*factors(2) + ...
+                                (gz0-gzi)*(gz0-gzi)*factors(3);
+                            dist = dist/(h*h);
+                            % Compute the weight in case the distance is below
+                            % the pre-selection threshold, otherwise set to 0:
+                            if( dist<ps )
+                                dist = exp(-dist);
+                            else
+                                dist = 0;
+                            end
+                            % Avoid over-weighting of the central pixel:
+                            if(dist>0.367879441171442) 
+                                dist = 0.367879441171442;
+                            end
+                            % Add to the current value. Average the pixels or
+                            % their squared values depending on the filtering
+                            % type:
+                            if( FLAG==0 ) % Gaussian
+                                pixel = pixel + dist * V(t,s,u);
+                            else %Rician
+                                pixel = pixel + dist * V(t,s,u) * V(t,s,u);
+                            end
+                            % Store the normalization:
+                            NORM  = NORM + dist;
+                        end
+                    end
+                end
+            end
+            % Normalize the pixel. If we are in the Rician case, we need
+            % also to remove the bias:
+            if( FLAG==0 ) % Gaussian
+                pixel = pixel/NORM;
+            else % Rician
+                pixel = sqrt(max(pixel/NORM-2*sigma*sigma,0));
+            end
+            % Set the output pixel:
+            out(y,x,z) = pixel;
+        end
+    end
+end
+return;
+
+%--------------------------------------------------------------------------
+function [mu,Gx,Gy,Gz,factors,hcorr] = ComputeLocalFeatures3D( I, radii )
+% Computes the local mean value and the local gradients of a 3D image.
+%
+%    I:       the input image
+%    radii:   a 3x1 vector of integers with the size of the neighborhood used
+%             to compute the local values. Gaussian windows are used
+%             generated for each dimension as gausswin(2*radii(d)+1). If not
+%             provided, [x=1;y=1;z=1] will be assumed
+%    mu:      A 3D image, the same size as I, with local mean.
+%    Gx:      A 3D image, the same size as I, with the gradient in the 'x'
+%             direction (dimension 2 in matlab).
+%    Gy:      A 3D image, the same size as I, with the gradient in the 'y'
+%             direction (dimension 1 in matlab).
+%    Gz:      A 3D image, the same size as I, with the gradient in the 'z'
+%             direction (dimension 3 in matlab).
+%    factors: a 3x1 vector with the factors to be applied to each gradient
+%             difference to estimate patch distances.
+%    hcorr:   the effective reduction in the amount of noise in the
+%             distances between patches because of the fitting.
+
+I = double(I);
+
+% Check if the radii where provided:
+if( nargin<2 )
+    radii = [1;1;1];
+else
+    if( length(radii) ~= 3 )
+        radii = ones(3,1)*radii(1);
+    end
+end
+
+% Create the gaussian windows for each direction:
+gx = gausswin( 2*radii(1) + 1 ); gx = gx./sum(gx);
+gy = gausswin( 2*radii(2) + 1 ); gy = gy./sum(gy);
+gz = gausswin( 2*radii(3) + 1 ); gz = gz./sum(gz);
+
+% Compute the local mean:
+mu = My3DConv( I, gx, gy, gz );
+
+% Create the differentiation kernels:
+gdx = (-radii(1):radii(1))';
+gdx = (gdx.*gx)./sum(gdx.*gdx.*gx);
+gdy = (-radii(2):radii(2))';
+gdy = (gdy.*gy)./sum(gdy.*gdy.*gy);
+gdz = (-radii(3):radii(3))';
+gdz = (gdz.*gz)./sum(gdz.*gdz.*gz);
+
+% Create each gradient image (the minus sign is for consistence with the
+% implementation of matlab's 'gradient' function:
+Gx  = -My3DConv( I, gdx, gy,  gz  );
+Gy  = -My3DConv( I, gx,  gdy, gz  );
+Gz  = -My3DConv( I, gx,  gy,  gdz );
+
+% Compute the scaling factors:
+factors(1) = sum( (-radii(1):radii(1)).*(-radii(1):radii(1)).*gx' );
+factors(2) = sum( (-radii(2):radii(2)).*(-radii(2):radii(2)).*gy' );
+factors(3) = sum( (-radii(3):radii(3)).*(-radii(3):radii(3)).*gz' );
+
+% Compute the correction in the h factor. First, compute the 'X' matrix:
+[x,y,z]    = meshgrid( -radii(1):radii(1), ...
+    -radii(2):radii(2), ...
+    -radii(3):radii(3) );
+X          = [ ones(size(x(:))), ...
+    x(:), y(:), z(:), ...
+    x(:).*x(:)/2, y(:).*y(:)/2, z(:).*z(:)/2, ...
+    x(:).*y(:), x(:).*z(:), y(:).*z(:) ];
+[g1,g2,g3] = meshgrid( gx, gy, gz );
+R          = g1(:).*g2(:).*g3(:);
+hcorr    = sqrt(trace(diag(R)*X*(X'*X)^(-1)*X'));
+return;
+
+%--------------------------------------------------------------------------
+function out = My3DConv( I, gx, gy, gz )
+% Computes a separable 3D convolution
+gx  = gx(:);
+gx  = permute(gx,[2,1,3]);
+gy  = gy(:);
+gz  = gz(:);
+gz  = permute(gz,[3,2,1]);
+I   = convn( I, gx, 'same' );
+I   = convn( I, gy, 'same' );
+out = convn( I, gz, 'same' );
+return;
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3731cad
--- /dev/null
+++ b/README.md
@@ -0,0 +1,25 @@
+# thyroid-us
+
+Code repository for *Evaluation of Thyroid Nodules Seen on Ultrasound: Comparison of Deep Learning to Radiologists Using ACR TI-RADS*.
+
+It contains multi-task CNN model definition, training, and inference scripts.
+
+![Multi-task CNN](./multitask.png)
+
+We used Keras framework with TensorFlow backend.
+
+If you use this code in your research, consider citing:
+
+```
+@article{buda2019evaluation,
+  title={Evaluation of Thyroid Nodules Seen on Ultrasound: Comparison of Deep Learning to Radiologists Using ACR TI-RADS},
+  author={Buda, Mateusz and Wildman-Tobriner, Benjamin and Hoang, Jenny K and Thayer, David and Tessler, Franklin N and Middleton, William D and Mazurowski, Maciej A},
+  journal={Radiology},
+  year={2019},
+  publisher={Radiological Society of North America}
+}
+```
+
+ROC curves comparing our method to radiologists for evaluating malignancy on the test set of 99 cases:
+
+![Test ROC](./roc_test.png)
diff --git a/crop_images.m b/crop_images.m
new file mode 100644
index 0000000..f346b7d
--- /dev/null
+++ b/crop_images.m
@@ -0,0 +1,123 @@
+function [ ] = crop_images( images_regex, cals_path, target_dim )
+%CROP_IMAGES Crops nodules to square bounding box defined by callipers and
+%resizes them to given dimensions.
+
+if nargin < 1
+    images_regex = '/data/images-cv/*.PNG';
+end
+if nargin < 2
+    cals_path = '/data/detection/Calipers-cv';
+end
+if nargin < 3
+    target_dim = 160;
+end
+
+margin = 32;
+
+images_dir = dir(images_regex);
+
+for i = 1:numel(images_dir)
+    
+    img_path = fullfile(images_dir(i).folder, images_dir(i).name);
+    cal_filename = strrep(images_dir(i).name, 'PNG', 'csv');
+    cal_path = fullfile(cals_path, cal_filename);
+    
+    cal = csvread(cal_path);
+    cal = cal(:, 1:2);
+    
+    image = rgbread(img_path);
+    
+    image = medfilt3(image);
+    
+    image = crop2bbox(image, cal, margin);
+    
+    image = pad2square(image);
+    
+    image = imresize(image, [target_dim target_dim]);
+    
+    lims = [0.01 0.99];
+    image = imadjust(image, stretchlim(image, lims), []);
+    
+    image = FastNonLocalMeans3D(double(image)/255, 0.1);
+    image = uint8(image * 255);
+    
+    imwrite(image, img_path);
+    
+end
+
+end
+
+
+function [ padded ] = pad2square( img )
+
+    if size(img, 1) == size(img, 2)
+        padded = img;
+        return;
+    end
+    
+    if size(img, 1) < size(img, 2)
+        ypad_post = ceil((size(img, 2) - size(img, 1)) / 2.0);
+        ypad_pre = floor((size(img, 2) - size(img, 1)) / 2.0);
+        xpad_post = 0;
+        xpad_pre = 0;
+    else
+        xpad_post = ceil((size(img, 1) - size(img, 2)) / 2.0);
+        xpad_pre = floor((size(img, 1) - size(img, 2)) / 2.0);
+        ypad_post = 0;
+        ypad_pre = 0;
+    end
+    
+    padded = padarray(img, [ypad_post xpad_post], 0, 'post');
+    padded = padarray(padded, [ypad_pre xpad_pre], 0, 'pre');
+
+end
+
+function [ cropped ] = crop2bbox( img, cals, margin )
+
+    height = size(img, 1);
+    width = size(img, 2);
+    
+    if size(cals, 1) <= 2
+        center = [min(cals(:, 1)) + abs(cals(1, 1) - cals(2, 1)) / 2; ...
+            min(cals(:, 2)) + abs(cals(1, 2) - cals(2, 2)) / 2];
+        R = [cosd(90) -sind(90); sind(90) cosd(90)];
+        cals = padarray(cals, [2 0], 1, 'post');
+        cals(3, 1:2) = (R * (cals(1, 1:2)' - center) + center)';
+        cals(4, 1:2) = (R * (cals(2, 1:2)' - center) + center)';
+    end
+    
+    ymin = max(1, min(cals(:, 1)) - margin);
+    ymax = min(max(cals(:, 1)) + margin, height);
+    xmin = max(1, min(cals(:, 2)) - margin);
+    xmax = min(max(cals(:, 2)) + margin, width);
+    
+    ymid = (ymax + ymin) / 2;
+    xmid = (xmax + xmin) / 2;
+    
+    box_size = max((ymax - ymin), (xmax - xmin)) / 2;
+    box_size = max(box_size, 80);
+    
+    ymin = round(max(1, ymid - box_size));
+    ymax = round(min(ymid + box_size, height));
+    xmin = round(max(1, xmid - box_size));
+    xmax = round(min(xmid + box_size, width));
+    
+    cropped = img(ymin:ymax, xmin:xmax);
+
+end
+
+function [ rgb ] = rgbread( img_path )
+%RGBREAD Reads image from given path and transforms it to RGB image if
+%needed
+
+[img, map] = imread(img_path);
+
+if map
+    rgb = ind2rgb(img, map);
+else
+    if size(img, 3) == 1
+        rgb = cat(3, img, img, img);
+    else
+        rgb = img;
+    end
+end
diff --git a/data.py b/data.py
new file mode 100644
index 0000000..ff93b28
--- /dev/null
+++ b/data.py
@@ -0,0 +1,414 @@
+import numpy as np
+import pandas as pd
+from glob import glob
+from imgaug import augmenters
+from random import seed, randint
+from scipy.misc import imread
+
+data_path = "./data.csv"
+images_dir = "/data/images-cv"
+test_images_dir = "/data/images-test"
+
+random_seed = 3
+total_folds = 10
+
+
+def feature_classes(feature):
+    df = pd.read_csv(data_path)
+    df.fillna(0, inplace=True)
+    df.Calcs1.replace(0, "None", inplace=True)
+    if feature == "composition":
+        return list(pd.get_dummies(df.Composition, prefix="", prefix_sep="").columns)
+    if feature == "echogenicity":
+        return list(pd.get_dummies(df.Echogenicity, prefix="", prefix_sep="").columns)
+    if feature == "shape":
+        return ["wider", "taller"]
+    if feature == "calcification":
+        return list(pd.get_dummies(df.Calcs1, prefix="", prefix_sep="").columns)
+    if feature == "margin":
+        return list(pd.get_dummies(df.MargA, prefix="", prefix_sep="").columns)
+    return []
+
+
+def fold_pids(fold, test=True):
+    # get patient IDs for given training fold in 10-fold cross-validation
+    # if test=False, test patient IDs are returned
+    df = pd.read_csv(data_path)
+    all_files = glob(os.path.join(images_dir, "*.PNG"))
+    val_ids = validation_ids(fold, df[["ID", "Cancer"]])
+    pids = []
+    for f_path in all_files:
+        pid = fname2pid(f_path)
+        if (test and pid in val_ids) or (not test and pid not in val_ids):
+            pids.append(pid)
+    return pids
+
+
+def test_pids():
+    # get patient IDs for test cases
+    test_files = sorted(glob(os.path.join(test_images_dir, "*.PNG")))
+    pids = []
+    for f_path in test_files:
+        pids.append(fname2pid(f_path))
+    return pids
+
+
+def train_pids():
+    # get patient IDs for training cases
+    train_files = sorted(glob(images_dir + "/*.PNG"))
+    pids = []
+    for f_path in train_files:
+        pids.append(fname2pid(f_path))
+    return pids
+
+
+def train_data():
+    # get images and labels for training cases
+    df = pd.read_csv(data_path)
+    df.fillna(0, inplace=True)
+    df.Calcs1.replace(0, "None", inplace=True)
+
+    df_cancer = df[["ID", "Cancer"]]
+    df_compos = pd.concat([df.ID, pd.get_dummies(df.Composition)], axis=1)
+    df_echo = pd.concat([df.ID, pd.get_dummies(df.Echogenicity)], axis=1)
+    df_shape = df[["ID", "Shape"]]
+    df_shape["Shape"] = df_shape.apply(lambda row: 1 if row.Shape == "y" else 0, axis=1)
+    df_calcs = pd.concat([df.ID, pd.get_dummies(df.Calcs1)], axis=1)
+    df_margin = pd.concat([df.ID, pd.get_dummies(df.MargA)], axis=1)
+
+    train_files = sorted(glob(os.path.join(images_dir, "*.PNG")))
+    X_train = []
+
+    # labels for malignancy and 5 TI-RADS features
+    y_train_cancer = []
+    y_train_compos = []
+    y_train_echo = []
+    y_train_shape = []
+    y_train_calcs = []
+    y_train_margin = []
+
+    for f_path in train_files:
+        pid = fname2pid(f_path)
+        X_train.append(
+            np.expand_dims(
+                np.array(imread(f_path, flatten=False, mode="F")).astype(np.float32),
+                axis=-1,
+            )
+        )
+        y_train_cancer.append(
+            df_cancer[df_cancer.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+        )
+        y_train_compos.append(
+            df_compos[df_compos.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+        )
+        y_train_echo.append(
+            df_echo[df_echo.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+        )
+        # for shape feature, only assign positive label to transversal view
+        if "trans" in f_path:
+            y_train_shape.append(
+                df_shape[df_shape.ID == pid]
+                .as_matrix()
+                .flatten()[1:]
+                .astype(np.float32)
+            )
+        else:
+            y_train_shape.append(np.array([0]).astype(np.float32))
+        y_train_calcs.append(
+            df_calcs[df_calcs.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+        )
+        y_train_margin.append(
+            df_margin[df_margin.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+        )
+
+    X_train = np.array(X_train)
+
+    # normalize
+    X_train /= 255.
+    X_train -= 0.5
+    X_train *= 2.
+
+    y_train = {
+        "out_cancer": np.array(y_train_cancer),
+        "out_compos": np.array(y_train_compos),
+        "out_echo": np.array(y_train_echo),
+        "out_shape": np.array(y_train_shape),
+        "out_calcs": np.array(y_train_calcs),
+        "out_margin": np.array(y_train_margin),
+    }
+
+    return X_train, y_train
+
+
+def test_data():
+    # get images and labels for test cases
+    df = pd.read_csv(data_path)
+    df.fillna(0, inplace=True)
+    df.Calcs1.replace(0, "None", inplace=True)
+
+    df_cancer = df[["ID", "Cancer"]]
+    df_compos = pd.concat([df.ID, pd.get_dummies(df.Composition)], axis=1)
+    df_echo = pd.concat([df.ID, pd.get_dummies(df.Echogenicity)], axis=1)
+    df_shape = df[["ID", "Shape"]]
+    df_shape["Shape"] = df_shape.apply(lambda row: 1 if row.Shape == "y" else 0, axis=1)
+    df_calcs = pd.concat([df.ID, pd.get_dummies(df.Calcs1)], axis=1)
+    df_margin = pd.concat([df.ID, pd.get_dummies(df.MargA)], axis=1)
+
+    test_files = sorted(glob(test_images_dir + "/*.PNG"))
+
+    X_test = []
+
+    # labels for malignancy and 5 TI-RADS features
+    y_test_cancer = []
+    y_test_compos = []
+    y_test_echo = []
+    y_test_shape = []
+    y_test_calcs = []
+    y_test_margin = []
+
+    for f_path in test_files:
+        pid = fname2pid(f_path)
+        X_test.append(
+            np.expand_dims(
+                np.array(imread(f_path, flatten=False, mode="F")).astype(np.float32),
+                axis=-1,
+            )
+        )
+        y_test_cancer.append(
+            df_cancer[df_cancer.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+        )
+        y_test_compos.append(
+            df_compos[df_compos.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+        )
+        y_test_echo.append(
+            df_echo[df_echo.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+        )
+        if "trans" in f_path:
+            y_test_shape.append(
+                df_shape[df_shape.ID == pid]
+                .as_matrix()
+                .flatten()[1:]
+                .astype(np.float32)
+            )
+        else:
+            y_test_shape.append(np.array([0]).astype(np.float32))
+        y_test_calcs.append(
+            df_calcs[df_calcs.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+        )
+        y_test_margin.append(
+            df_margin[df_margin.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+        )
+
+    X_test = np.array(X_test)
+
+    # normalize
+    X_test /= 255.
+    X_test -= 0.5
+    X_test *= 2.
+
+    y_test = [
+        np.array(y_test_cancer),
+        np.array(y_test_compos),
+        np.array(y_test_echo),
+        np.array(y_test_shape),
+        np.array(y_test_calcs),
+        np.array(y_test_margin),
+    ]
+
+    return X_test, y_test
+
+
+def fold_data(fold):
+    # get images and labels for given fold
+    df = pd.read_csv(data_path)
+    df.fillna(0, inplace=True)
+    df.Calcs1.replace(0, "None", inplace=True)
+
+    df_cancer = df[["ID", "Cancer"]]
+    df_compos = pd.concat([df.ID, pd.get_dummies(df.Composition)], axis=1)
+    df_echo = pd.concat([df.ID, pd.get_dummies(df.Echogenicity)], axis=1)
+    df_shape = df[["ID", "Shape"]]
+    df_shape["Shape"] = df_shape.apply(lambda row: 1 if row.Shape == "y" else 0, axis=1)
+    df_calcs = pd.concat([df.ID, pd.get_dummies(df.Calcs1)], axis=1)
+    df_margin = pd.concat([df.ID, pd.get_dummies(df.MargA)], axis=1)
+
+    all_files = glob(images_dir + "/*.PNG")
+    val_ids = validation_ids(fold, df_cancer)
+
+    X_train = []
+    X_test = []
+
+    # labels for malignancy and 5 TI-RADS features
+    y_train_cancer = []
+    y_train_compos = []
+    y_train_echo = []
+    y_train_shape = []
+    y_train_calcs = []
+    y_train_margin = []
+    y_test_cancer = []
+    y_test_compos = []
+    y_test_echo = []
+    y_test_shape = []
+    y_test_calcs = []
+    y_test_margin = []
+
+    for f_path in all_files:
+        pid = fname2pid(f_path)
+        image = np.expand_dims(
+            np.array(imread(f_path, flatten=False, mode="F")).astype(np.float32),
+            axis=-1,
+        )
+        if pid in val_ids:
+            X_test.append(image)
+            y_test_cancer.append(
+                df_cancer[df_cancer.ID == pid]
+                .as_matrix()
+                .flatten()[1:]
+                .astype(np.float32)
+            )
+            y_test_compos.append(
+                df_compos[df_compos.ID == pid]
+                .as_matrix()
+                .flatten()[1:]
+                .astype(np.float32)
+            )
+            y_test_echo.append(
+                df_echo[df_echo.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+            )
+            if "trans" in f_path:
+                y_test_shape.append(
+                    df_shape[df_shape.ID == pid]
+                    .as_matrix()
+                    .flatten()[1:]
+                    .astype(np.float32)
+                )
+            else:
+                y_test_shape.append(np.array([0]).astype(np.float32))
+            y_test_calcs.append(
+                df_calcs[df_calcs.ID == pid]
+                .as_matrix()
+                .flatten()[1:]
+                .astype(np.float32)
+            )
+            y_test_margin.append(
+                df_margin[df_margin.ID == pid]
+                .as_matrix()
+                .flatten()[1:]
+                .astype(np.float32)
+            )
+        else:
+            X_train.append(image)
+            y_train_cancer.append(
+                df_cancer[df_cancer.ID == pid]
+                .as_matrix()
+                .flatten()[1:]
+                .astype(np.float32)
+            )
+            y_train_compos.append(
+                df_compos[df_compos.ID == pid]
+                .as_matrix()
+                .flatten()[1:]
+                .astype(np.float32)
+            )
+            y_train_echo.append(
+                df_echo[df_echo.ID == pid].as_matrix().flatten()[1:].astype(np.float32)
+            )
+            if "trans" in f_path:
+                y_train_shape.append(
+                    df_shape[df_shape.ID == pid]
+                    .as_matrix()
+                    .flatten()[1:]
+                    .astype(np.float32)
+                )
+            else:
+                y_train_shape.append(np.array([0]).astype(np.float32))
+            y_train_calcs.append(
+                df_calcs[df_calcs.ID == pid]
+                .as_matrix()
+                .flatten()[1:]
+                .astype(np.float32)
+            )
+            y_train_margin.append(
+                df_margin[df_margin.ID == pid]
+                .as_matrix()
+                .flatten()[1:]
+                .astype(np.float32)
+            )
+
+    X_train = np.array(X_train)
+    X_test = np.array(X_test)
+
+    # normalize
+    X_train /= 255.
+    X_train -= 0.5
+    X_train *= 2.
+    X_test /= 255.
+    X_test -= 0.5
+    X_test *= 2.
+
+    y_train = {
+        "out_cancer": np.array(y_train_cancer),
+        "out_compos": np.array(y_train_compos),
+        "out_echo": np.array(y_train_echo),
+        "out_shape": np.array(y_train_shape),
+        "out_calcs": np.array(y_train_calcs),
+        "out_margin": np.array(y_train_margin),
+    }
+
+    y_test = [
+        np.array(y_test_cancer),
+        np.array(y_test_compos),
+        np.array(y_test_echo),
+        np.array(y_test_shape),
+        np.array(y_test_calcs),
+        np.array(y_test_margin),
+    ]
+
+    return X_train, y_train, X_test, y_test
+
+
+def augment(X):
+    # data augmentation
+    seq = augmenters.Sequential(
+        [
+            augmenters.Fliplr(0.5),
+            augmenters.Flipud(0.5),
+            augmenters.Affine(rotate=(-15, 15)),
+            augmenters.Affine(shear=(-15, 15)),
+            augmenters.Affine(translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}),
+            augmenters.Affine(scale=(0.9, 1.1)),
+        ]
+    )
+    return seq.augment_images(X)
+
+
+def validation_ids(fold, df_cancer):
+    # get patient IDs in given fold
+    pid_set = set()
+    all_files = glob(images_dir + "/*.PNG")
+    for f_path in all_files:
+        pid = fname2pid(f_path)
+        pid_set.add(pid)
+
+    val_ids = []
+
+    # set random seed to get the same split every time
+    seed(random_seed)
+    # stratified split
+    malignant_fold = 0
+    for pid in sorted(pid_set):
+        label = df_cancer[df_cancer.ID == pid].as_matrix().flatten()[1:]
+        if label == 1:
+            if fold == np.mod(malignant_fold, total_folds):
+                val_ids.append(pid)
+            malignant_fold += 1
+        else:
+            if fold == randint(0, total_folds - 1):
+                val_ids.append(pid)
+
+    return val_ids
+
+
+def fname2pid(fname):
+    # get patient ID from image file name
+    return fname.split("/")[-1].split(".")[0].lstrip("0")
diff --git a/focal_loss.py b/focal_loss.py
new file mode 100644
index 0000000..064f4ae
--- /dev/null
+++ b/focal_loss.py
@@ -0,0 +1,11 @@
+import tensorflow as tf
+from keras import backend as K
+
+
+def focal_loss(gamma=2, alpha=2):
+
+    def focal_loss_fixed(y_true, y_pred):
+        pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred)
+        return -K.sum(alpha * K.pow(1. - pt, gamma) * K.log(pt + 1e-6), axis=-1)
+
+    return focal_loss_fixed
diff --git a/model.py b/model.py
new file mode 100644
index 0000000..33adae3
--- /dev/null
+++ b/model.py
@@ -0,0 +1,113 @@
+from keras.initializers import Constant
+from keras.layers import Input, Conv2D, Flatten, Activation, MaxPool2D, Dropout
+from keras.models import Model
+
+from focal_loss import focal_loss
+
+img_width, img_height = 160, 160
+
+loss_dict = {
+    "out_cancer": focal_loss(),
+    "out_compos": focal_loss(),
+    "out_echo": focal_loss(),
+    "out_shape": focal_loss(),
+    "out_calcs": focal_loss(),
+    "out_margin": focal_loss(),
+}
+
+loss_weights_dict = {
+    "out_cancer": 1.0,
+    "out_compos": 1.0,
+    "out_echo": 1.0,
+    "out_shape": 1.0,
+    "out_calcs": 1.0,
+    "out_margin": 1.0,
+}
+
+
+def multitask_cnn():
+    # 160x160x1
+    input_tensor = Input(shape=(img_height, img_width, 1), name="thyroid_input")
+    # 160x160x8
+    x = Conv2D(8, (3, 3), padding="same", activation="relu")(input_tensor)
+    # 80x80x8
+    x = MaxPool2D((2, 2), strides=(2, 2))(x)
+    # 80x80x12
+    x = Conv2D(12, (3, 3), padding="same", activation="relu")(x)
+    # 40x40x12
+    x = MaxPool2D((2, 2), strides=(2, 2))(x)
+    # 40x40x16
+    x = Conv2D(16, (3, 3), padding="same", activation="relu")(x)
+    # 20x20x16
+    x = MaxPool2D((2, 2), strides=(2, 2))(x)
+    # 20x20x24
+    x = Conv2D(24, (3, 3), padding="same", activation="relu")(x)
+    # 10x10x24
+    x = MaxPool2D((2, 2), strides=(2, 2))(x)
+    # 10x10x32
+    x = Conv2D(32, (3, 3), padding="same", activation="relu")(x)
+    # 5x5x32
+    x = MaxPool2D((2, 2), strides=(2, 2))(x)
+    # 5x5x48
+    x = Conv2D(48, (3, 3), padding="same", activation="relu")(x)
+    # 5x5x48
+    x = Dropout(0.5)(x)
+
+    y_cancer = Conv2D(
+        filters=1,
+        kernel_size=(5, 5),
+        kernel_initializer="glorot_normal",
+        bias_initializer=Constant(value=-0.9),
+    )(x)
+    y_cancer = Flatten()(y_cancer)
+    y_cancer = Activation("sigmoid", name="out_cancer")(y_cancer)
+
+    y_compos = Conv2D(
+        filters=5,
+        kernel_size=(5, 5),
+        kernel_initializer="glorot_normal",
+        bias_initializer=Constant(value=-0.9),
+    )(x)
+    y_compos = Flatten()(y_compos)
+    y_compos = Activation("softmax", name="out_compos")(y_compos)
+
+    y_echo = Conv2D(
+        filters=5,
+        kernel_size=(5, 5),
+        kernel_initializer="glorot_normal",
+        bias_initializer=Constant(value=-0.9),
+    )(x)
+    y_echo = Flatten()(y_echo)
+    y_echo = Activation("softmax", name="out_echo")(y_echo)
+
+    y_shape = Conv2D(
+        filters=1,
+        kernel_size=(5, 5),
+        kernel_initializer="glorot_normal",
+        bias_initializer=Constant(value=-0.9),
+    )(x)
+    y_shape = Flatten()(y_shape)
+    y_shape = Activation("sigmoid", name="out_shape")(y_shape)
+
+    y_calcs = Conv2D(
+        filters=5,
+        kernel_size=(5, 5),
+        kernel_initializer="glorot_normal",
+        bias_initializer=Constant(value=-0.9),
+    )(x)
+    y_calcs = Flatten()(y_calcs)
+    y_calcs = Activation("softmax", name="out_calcs")(y_calcs)
+
+    y_margin = Conv2D(
+        filters=4,
+        kernel_size=(5, 5),
+        kernel_initializer="glorot_normal",
+        bias_initializer=Constant(value=-0.9),
+    )(x)
+    y_margin = Flatten()(y_margin)
+    y_margin = Activation("softmax", name="out_margin")(y_margin)
+
+    return Model(
+        inputs=[input_tensor],
+        outputs=[y_cancer, y_compos, y_echo, y_shape, y_calcs, y_margin],
+    )
diff --git a/multitask.png b/multitask.png
new file mode 100644
index 0000000..3af6df9
Binary files /dev/null and b/multitask.png differ
diff --git a/plots.py b/plots.py
new file mode 100644
index 0000000..3afd6f2
--- /dev/null
+++ b/plots.py
@@ -0,0 +1,24 @@
+import matplotlib.pyplot as plt
+from sklearn.metrics import roc_auc_score, roc_curve
+
+
+def plot_roc(y_true, y_pred, figname="roc.png"):
+    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
+    roc_auc = roc_auc_score(y_true, y_pred)
+
+    print("roc auc = {}".format(roc_auc))
+
+    plt.rcParams.update({"font.size": 24})
+
+    fig = plt.figure(figsize=(10, 10))
+    plt.plot(fpr, tpr, color="blue", lw=2, label="ROC curve (area = %0.2f)" % roc_auc)
+    plt.plot([0, 1], [0, 1], color="gray", lw=1, linestyle="--")
+    plt.grid(color="silver", alpha=0.3, linestyle="--", linewidth=1)
+    plt.xlim([0.0, 1.0])
+    plt.ylim([0.0, 1.0])
+    plt.xlabel("False Positive Rate")
+    plt.ylabel("True Positive Rate")
+    plt.legend(loc="lower right")
+    plt.tight_layout()
+    plt.savefig(figname, bbox_inches="tight")
+    plt.close(fig)
diff --git a/roc_test.png b/roc_test.png
new file mode 100644
index 0000000..440fe5c
Binary files /dev/null and b/roc_test.png differ
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..3fc0d83
--- /dev/null
+++ b/test.py
@@ -0,0 +1,73 @@
+import csv
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import sys
+import tensorflow as tf
+from keras import backend as K
+from keras.models import load_model
+from sklearn.metrics import roc_auc_score, roc_curve
+
+from data import test_data, test_pids
+from focal_loss import focal_loss
+from plots import plot_roc
+
+checkpoints_dir = "/data/test/checkpoints/"
+batch_size = 128
+nb_categories = 1
+
+
+def predict():
+    weights_path = os.path.join(checkpoints_dir, "weights.h5")
+
+    net = load_model(weights_path, custom_objects={"focal_loss_fixed": focal_loss()})
+
+    X_test, y_test = test_data()
+
+    preds = net.predict(X_test, batch_size=batch_size, verbose=1)
+
+    return preds[0], y_test[0]
+
+
+def test():
+    predictions, targets = predict()
+
+    cases_predictions = {}
+    cases_targets = {}
+    pids = test_pids()
+    for i in range(len(pids)):
+        pid = pids[i]
+        prev_pred = cases_predictions.get(pid, np.zeros(nb_categories))
+        preds = predictions[i]
+        cases_predictions[pid] = prev_pred + preds
+        cases_targets[pid] = targets[i]
+
+    y_pred = []
+    y_true = []
+    y_id = []
+    for pid in cases_predictions:
+        y_pred.append(cases_predictions[pid][0])
+        y_true.append(cases_targets[pid])
+        y_id.append(pid)
+
+    with open("./predictions_test.csv", "w") as csvfile:
+        csvwriter = csv.writer(csvfile)
+        csvwriter.writerow(["ID", "Prediction", "Cancer"])
+        for pid, prediction, gt in zip(y_id, y_pred, y_true):
+            pid = pid.lstrip("0")
+            csvwriter.writerow([pid, prediction, gt[0]])
+
+    plot_roc(y_true, y_pred, figname="roc_test.png")
+
+
+if __name__ == "__main__":
+
+    config = tf.ConfigProto()
+    config.gpu_options.allow_growth = True
+    config.allow_soft_placement = True
+    sess = tf.Session(config=config)
+    K.set_session(sess)
+
+    device = "/gpu:" + sys.argv[1]
+    with tf.device(device):
+        test()
diff --git a/test_cv.py b/test_cv.py
new file mode 100644
index 0000000..09ac27c
--- /dev/null
+++ b/test_cv.py
@@ -0,0 +1,93 @@
+import csv
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import sys
+import tensorflow as tf
+from keras import backend as K
+from keras.models import load_model
+from sklearn.metrics import roc_auc_score, roc_curve
+
+from data import fold_data, fold_pids
+from focal_loss import focal_loss
+from plots import plot_roc
+
+checkpoints_dir = "/data/checkpoints/<FOLD>/"
+weights_file = "weights.h5"
+batch_size = 128
+nb_categories = 1
+
+
+def predict(fold):
+    fold_checkpoints_dir = checkpoints_dir.replace("<FOLD>", str(fold))
+    weights_path = os.path.join(fold_checkpoints_dir, weights_file)
+
+    net = load_model(weights_path, custom_objects={"focal_loss_fixed": focal_loss()})
+
+    x_train, y_train, x_test, y_test = fold_data(fold)
+
+    preds = net.predict(x_test, batch_size=batch_size, verbose=1)
+    y = y_test[0]
+
+    return preds[0], y
+
+
+def test(folds=10):
+    pids = []
+    predictions = np.zeros((0, nb_categories))
+    targets = []
+    pid_fold = []
+
+    for f in range(folds):
+        preds, t = predict(f)
+        predictions = np.vstack((predictions, preds))
+        pids.extend(fold_pids(f))
+        targets.extend(t)
+        pid_fold.extend([f] * len(t))
+
+    print("{} images".format(len(pids)))
+
+    cases_predictions = {}
+    cases_targets = {}
+    cases_folds = {}
+    for i in range(len(pids)):
+        pid = pids[i]
+        prev_pred = cases_predictions.get(pid, np.zeros(nb_categories))
+        preds = predictions[i]
+        cases_predictions[pid] = prev_pred + preds
+        cases_targets[pid] = targets[i]
+        cases_folds[pid] = pid_fold[i]
+
+    print("{} cases".format(len(cases_predictions)))
+
+    y_pred = []
+    y_true = []
+    y_id = []
+    y_fold = []
+    for pid in cases_predictions:
+        y_pred.append(cases_predictions[pid][0])
+        y_true.append(cases_targets[pid])
+        y_id.append(pid)
+        y_fold.append(cases_folds[pid])
+
+    with open("./predictions_cv.csv", "w") as csvfile:
+        csvwriter = csv.writer(csvfile)
+        csvwriter.writerow(["ID", "Prediction", "Cancer", "Fold"])
+        for pid, prediction, gt, f in zip(y_id, y_pred, y_true, y_fold):
+            pid = pid.lstrip("0")
+            csvwriter.writerow([pid, prediction, gt[0], f])
+
+    plot_roc(y_true, y_pred, figname="roc_cv.png")
+
+
+if __name__ == "__main__":
+
+    config = tf.ConfigProto()
+    config.gpu_options.allow_growth = True
+    config.allow_soft_placement = True
+    sess = tf.Session(config=config)
+    K.set_session(sess)
+
+    device = "/gpu:" + sys.argv[1]
+    with tf.device(device):
+        test()
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..d656be0
--- /dev/null
+++ b/train.py
@@ -0,0 +1,82 @@
+import numpy as np
+import os
+import sys
+import tensorflow as tf
+from keras import backend as K
+from keras.callbacks import TensorBoard
+from keras.optimizers import RMSprop
+from sklearn.metrics import roc_auc_score
+
+from data import augment, train_data, test_data
+from model import multitask_cnn, loss_dict, loss_weights_dict
+
+checkpoints_dir = "/data/test/checkpoints/"
+logs_dir = "/data/test/logs/"
+
+batch_size = 128
+epochs = 250
+base_lr = 0.001
+
+
+def train():
+    if not os.path.exists(checkpoints_dir):
+        os.makedirs(checkpoints_dir)
+    if not os.path.exists(logs_dir):
+        os.makedirs(logs_dir)
+
+    X_train, y_train = train_data()
+    X_test, y_test = test_data()
+
+    print("Training and validation data processed.")
+
+    model = multitask_cnn()
+
+    optimizer = RMSprop(lr=base_lr)
+    model.compile(
+        optimizer=optimizer,
+        loss=loss_dict,
+        loss_weights=loss_weights_dict,
+        metrics=["accuracy"],
+    )
+
+    training_log = TensorBoard(log_dir=os.path.join(logs_dir, "log"), write_graph=False)
+
+    callbacks = [training_log]
+
+    for e in range(epochs):
+        X_train_augmented = augment(X_train)
+        model.fit(
+            {"thyroid_input": X_train_augmented},
+            y_train,
+            validation_data=(X_test, y_test),
+            batch_size=batch_size,
+            epochs=e + 1,
+            initial_epoch=e,
+            shuffle=True,
+            callbacks=callbacks,
+        )
+
+        if np.mod(e + 1, 10) == 0:
+            y_pred = model.predict(X_train, batch_size=batch_size, verbose=1)
+            auc_train = roc_auc_score(y_train["out_cancer"], y_pred[0])
+            y_pred = model.predict(X_test, batch_size=batch_size, verbose=1)
+            auc_test = roc_auc_score(y_test[0], y_pred[0])
+            with open(os.path.join(logs_dir, "auc.txt"), "a") as auc_file:
+                auc_file.write("{},{}\n".format(auc_train, auc_test))
+
+    model.save(os.path.join(checkpoints_dir, "weights.h5"))
+
+    print("Training completed.")
+
+
+if __name__ == "__main__":
+
+    config = tf.ConfigProto()
+    config.gpu_options.allow_growth = True
+    config.allow_soft_placement = True
+    sess = tf.Session(config=config)
+    K.set_session(sess)
+
+    device = "/gpu:" + sys.argv[1]
+    with tf.device(device):
+        train()
diff --git a/train_cv.py b/train_cv.py
new file mode 100644
index 0000000..cb64587
--- /dev/null
+++ b/train_cv.py
@@ -0,0 +1,92 @@
+import numpy as np
+import os
+import sys
+import tensorflow as tf
+from keras import backend as K
+from keras.callbacks import TensorBoard
+from keras.optimizers import RMSprop
+from sklearn.metrics import roc_auc_score
+
+from data import fold_data, augment
+from model import multitask_cnn, loss_dict, loss_weights_dict
+
+checkpoints_dir = "/data/checkpoints/<FOLD>/"
+logs_dir = "/data/logs/<FOLD>/"
+
+batch_size = 128
+epochs = 250
+base_lr = 0.001
+
+
+def train(fold):
+    fold_checkpoints_dir = checkpoints_dir.replace("<FOLD>", str(fold))
+    fold_logs_dir = logs_dir.replace("<FOLD>", str(fold))
+
+    if not os.path.exists(fold_checkpoints_dir):
+        os.makedirs(fold_checkpoints_dir)
+    if not os.path.exists(fold_logs_dir):
+        os.makedirs(fold_logs_dir)
+
+    x_train, y_train, x_test, y_test = fold_data(fold)
+
+    print("Training and validation data processed.")
+    print("Training data shape: {}".format(len(x_train)))
+    print("Test data shape: {}".format(len(x_test)))
+
+    model = multitask_cnn()
+
+    optimizer = RMSprop(lr=base_lr)
+
+    model.compile(
+        optimizer=optimizer,
+        loss=loss_dict,
+        loss_weights=loss_weights_dict,
+        metrics=["accuracy"],
+    )
+
+    training_log = TensorBoard(
+        log_dir=os.path.join(fold_logs_dir, "log"), write_graph=False
+    )
+
+    callbacks = [training_log]
+
+    y_train_cancer = y_train["out_cancer"]
+    y_test_cancer = y_test[0]
+
+    for e in range(epochs):
+        x_train_augmented = augment(x_train)
+        model.fit(
+            x={"thyroid_input": x_train_augmented},
+            y=y_train,
+            validation_data=(x_test, y_test),
+            batch_size=batch_size,
+            epochs=e + 1,
+            initial_epoch=e,
+            shuffle=True,
+            callbacks=callbacks,
+        )
+
+        if np.mod(e + 1, 10) == 0:
+            y_pred = model.predict(x_train, batch_size=batch_size, verbose=1)
+            auc_train = roc_auc_score(y_train_cancer, y_pred[0])
+            y_pred = model.predict(x_test, batch_size=batch_size, verbose=1)
+            auc_test = roc_auc_score(y_test_cancer, y_pred[0])
+            with open(os.path.join(fold_logs_dir, "auc.txt"), "a") as auc_file:
+                auc_file.write("{},{}\n".format(auc_train, auc_test))
+
+    model.save(os.path.join(fold_checkpoints_dir, "weights.h5"))
+
+    print("Training fold {} completed.".format(fold))
+
+
+if __name__ == "__main__":
+
+    config = tf.ConfigProto()
+    config.gpu_options.allow_growth = True
+    config.allow_soft_placement = True
+    sess = tf.Session(config=config)
+    K.set_session(sess)
+
+    device = "/gpu:" + sys.argv[1]
+    with tf.device(device):
+        train(int(sys.argv[2]))