Readme.html

<!DOCTYPE html><html><head>
      <title>Readme</title>
      <meta charset="utf-8">
      <meta name="viewport" content="width=device-width, initial-scale=1.0">
      
      <link rel="stylesheet" href="file:////home/users/astar/gis/pangmy/.vscode-server/extensions/shd101wyy.markdown-preview-enhanced-0.8.11/crossnote/dependencies/katex/katex.min.css">
      
      
      
      
      
      <style>
      code[class*=language-],pre[class*=language-]{color:#333;background:0 0;font-family:Consolas,"Liberation Mono",Menlo,Courier,monospace;text-align:left;white-space:pre;word-spacing:normal;word-break:normal;word-wrap:normal;line-height:1.4;-moz-tab-size:8;-o-tab-size:8;tab-size:8;-webkit-hyphens:none;-moz-hyphens:none;-ms-hyphens:none;hyphens:none}pre[class*=language-]{padding:.8em;overflow:auto;border-radius:3px;background:#f5f5f5}:not(pre)>code[class*=language-]{padding:.1em;border-radius:.3em;white-space:normal;background:#f5f5f5}.token.blockquote,.token.comment{color:#969896}.token.cdata{color:#183691}.token.doctype,.token.macro.property,.token.punctuation,.token.variable{color:#333}.token.builtin,.token.important,.token.keyword,.token.operator,.token.rule{color:#a71d5d}.token.attr-value,.token.regex,.token.string,.token.url{color:#183691}.token.atrule,.token.boolean,.token.code,.token.command,.token.constant,.token.entity,.token.number,.token.property,.token.symbol{color:#0086b3}.token.prolog,.token.selector,.token.tag{color:#63a35c}.token.attr-name,.token.class,.token.class-name,.token.function,.token.id,.token.namespace,.token.pseudo-class,.token.pseudo-element,.token.url-reference .token.variable{color:#795da3}.token.entity{cursor:help}.token.title,.token.title .token.punctuation{font-weight:700;color:#1d3e81}.token.list{color:#ed6a43}.token.inserted{background-color:#eaffea;color:#55a532}.token.deleted{background-color:#ffecec;color:#bd2c00}.token.bold{font-weight:700}.token.italic{font-style:italic}.language-json .token.property{color:#183691}.language-markup .token.tag .token.punctuation{color:#333}.language-css .token.function,code.language-css{color:#0086b3}.language-yaml .token.atrule{color:#63a35c}code.language-yaml{color:#183691}.language-ruby .token.function{color:#333}.language-markdown .token.url{color:#795da3}.language-makefile .token.symbol{color:#795da3}.language-makefile .token.variable{color:#183691}.language-makefile .token.builtin{color:#0086b3}.language-bash .token.keyword{color:#0086b3}pre[data-line]{position:relative;padding:1em 0 1em 3em}pre[data-line] .line-highlight-wrapper{position:absolute;top:0;left:0;background-color:transparent;display:block;width:100%}pre[data-line] .line-highlight{position:absolute;left:0;right:0;padding:inherit 0;margin-top:1em;background:hsla(24,20%,50%,.08);background:linear-gradient(to right,hsla(24,20%,50%,.1) 70%,hsla(24,20%,50%,0));pointer-events:none;line-height:inherit;white-space:pre}pre[data-line] .line-highlight:before,pre[data-line] .line-highlight[data-end]:after{content:attr(data-start);position:absolute;top:.4em;left:.6em;min-width:1em;padding:0 .5em;background-color:hsla(24,20%,50%,.4);color:#f4f1ef;font:bold 65%/1.5 sans-serif;text-align:center;vertical-align:.3em;border-radius:999px;text-shadow:none;box-shadow:0 1px #fff}pre[data-line] .line-highlight[data-end]:after{content:attr(data-end);top:auto;bottom:.4em}html body{font-family:'Helvetica Neue',Helvetica,'Segoe UI',Arial,freesans,sans-serif;font-size:16px;line-height:1.6;color:#333;background-color:#fff;overflow:initial;box-sizing:border-box;word-wrap:break-word}html body>:first-child{margin-top:0}html body h1,html body h2,html body h3,html body h4,html body h5,html body h6{line-height:1.2;margin-top:1em;margin-bottom:16px;color:#000}html body h1{font-size:2.25em;font-weight:300;padding-bottom:.3em}html body h2{font-size:1.75em;font-weight:400;padding-bottom:.3em}html body h3{font-size:1.5em;font-weight:500}html body h4{font-size:1.25em;font-weight:600}html body h5{font-size:1.1em;font-weight:600}html body h6{font-size:1em;font-weight:600}html body h1,html body h2,html body h3,html body h4,html body h5{font-weight:600}html body h5{font-size:1em}html body h6{color:#5c5c5c}html body strong{color:#000}html body del{color:#5c5c5c}html body a:not([href]){color:inherit;text-decoration:none}html body a{color:#08c;text-decoration:none}html body a:hover{color:#00a3f5;text-decoration:none}html body img{max-width:100%}html body>p{margin-top:0;margin-bottom:16px;word-wrap:break-word}html body>ol,html body>ul{margin-bottom:16px}html body ol,html body ul{padding-left:2em}html body ol.no-list,html body ul.no-list{padding:0;list-style-type:none}html body ol ol,html body ol ul,html body ul ol,html body ul ul{margin-top:0;margin-bottom:0}html body li{margin-bottom:0}html body li.task-list-item{list-style:none}html body li>p{margin-top:0;margin-bottom:0}html body .task-list-item-checkbox{margin:0 .2em .25em -1.8em;vertical-align:middle}html body .task-list-item-checkbox:hover{cursor:pointer}html body blockquote{margin:16px 0;font-size:inherit;padding:0 15px;color:#5c5c5c;background-color:#f0f0f0;border-left:4px solid #d6d6d6}html body blockquote>:first-child{margin-top:0}html body blockquote>:last-child{margin-bottom:0}html body hr{height:4px;margin:32px 0;background-color:#d6d6d6;border:0 none}html body table{margin:10px 0 15px 0;border-collapse:collapse;border-spacing:0;display:block;width:100%;overflow:auto;word-break:normal;word-break:keep-all}html body table th{font-weight:700;color:#000}html body table td,html body table th{border:1px solid #d6d6d6;padding:6px 13px}html body dl{padding:0}html body dl dt{padding:0;margin-top:16px;font-size:1em;font-style:italic;font-weight:700}html body dl dd{padding:0 16px;margin-bottom:16px}html body code{font-family:Menlo,Monaco,Consolas,'Courier New',monospace;font-size:.85em;color:#000;background-color:#f0f0f0;border-radius:3px;padding:.2em 0}html body code::after,html body code::before{letter-spacing:-.2em;content:'\00a0'}html body pre>code{padding:0;margin:0;word-break:normal;white-space:pre;background:0 0;border:0}html body .highlight{margin-bottom:16px}html body .highlight pre,html body pre{padding:1em;overflow:auto;line-height:1.45;border:#d6d6d6;border-radius:3px}html body .highlight pre{margin-bottom:0;word-break:normal}html body pre code,html body pre tt{display:inline;max-width:initial;padding:0;margin:0;overflow:initial;line-height:inherit;word-wrap:normal;background-color:transparent;border:0}html body pre code:after,html body pre code:before,html body pre tt:after,html body pre tt:before{content:normal}html body blockquote,html body dl,html body ol,html body p,html body pre,html body ul{margin-top:0;margin-bottom:16px}html body kbd{color:#000;border:1px solid #d6d6d6;border-bottom:2px solid #c7c7c7;padding:2px 4px;background-color:#f0f0f0;border-radius:3px}@media print{html body{background-color:#fff}html body h1,html body h2,html body h3,html body h4,html body h5,html body h6{color:#000;page-break-after:avoid}html body blockquote{color:#5c5c5c}html body pre{page-break-inside:avoid}html body table{display:table}html body img{display:block;max-width:100%;max-height:100%}html body code,html body pre{word-wrap:break-word;white-space:pre}}.markdown-preview{width:100%;height:100%;box-sizing:border-box}.markdown-preview ul{list-style:disc}.markdown-preview ul ul{list-style:circle}.markdown-preview ul ul ul{list-style:square}.markdown-preview ol{list-style:decimal}.markdown-preview ol ol,.markdown-preview ul ol{list-style-type:lower-roman}.markdown-preview ol ol ol,.markdown-preview ol ul ol,.markdown-preview ul ol ol,.markdown-preview ul ul ol{list-style-type:lower-alpha}.markdown-preview .newpage,.markdown-preview .pagebreak{page-break-before:always}.markdown-preview pre.line-numbers{position:relative;padding-left:3.8em;counter-reset:linenumber}.markdown-preview pre.line-numbers>code{position:relative}.markdown-preview pre.line-numbers .line-numbers-rows{position:absolute;pointer-events:none;top:1em;font-size:100%;left:0;width:3em;letter-spacing:-1px;border-right:1px solid #999;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.markdown-preview pre.line-numbers .line-numbers-rows>span{pointer-events:none;display:block;counter-increment:linenumber}.markdown-preview pre.line-numbers .line-numbers-rows>span:before{content:counter(linenumber);color:#999;display:block;padding-right:.8em;text-align:right}.markdown-preview .mathjax-exps .MathJax_Display{text-align:center!important}.markdown-preview:not([data-for=preview]) .code-chunk .code-chunk-btn-group{display:none}.markdown-preview:not([data-for=preview]) .code-chunk .status{display:none}.markdown-preview:not([data-for=preview]) .code-chunk .output-div{margin-bottom:16px}.markdown-preview .md-toc{padding:0}.markdown-preview .md-toc .md-toc-link-wrapper .md-toc-link{display:inline;padding:.25rem 0}.markdown-preview .md-toc .md-toc-link-wrapper .md-toc-link div,.markdown-preview .md-toc .md-toc-link-wrapper .md-toc-link p{display:inline}.markdown-preview .md-toc .md-toc-link-wrapper.highlighted .md-toc-link{font-weight:800}.scrollbar-style::-webkit-scrollbar{width:8px}.scrollbar-style::-webkit-scrollbar-track{border-radius:10px;background-color:transparent}.scrollbar-style::-webkit-scrollbar-thumb{border-radius:5px;background-color:rgba(150,150,150,.66);border:4px solid rgba(150,150,150,.66);background-clip:content-box}html body[for=html-export]:not([data-presentation-mode]){position:relative;width:100%;height:100%;top:0;left:0;margin:0;padding:0;overflow:auto}html body[for=html-export]:not([data-presentation-mode]) .markdown-preview{position:relative;top:0;min-height:100vh}@media screen and (min-width:914px){html body[for=html-export]:not([data-presentation-mode]) .markdown-preview{padding:2em calc(50% - 457px + 2em)}}@media screen and (max-width:914px){html body[for=html-export]:not([data-presentation-mode]) .markdown-preview{padding:2em}}@media screen and (max-width:450px){html body[for=html-export]:not([data-presentation-mode]) .markdown-preview{font-size:14px!important;padding:1em}}@media print{html body[for=html-export]:not([data-presentation-mode]) #sidebar-toc-btn{display:none}}html body[for=html-export]:not([data-presentation-mode]) #sidebar-toc-btn{position:fixed;bottom:8px;left:8px;font-size:28px;cursor:pointer;color:inherit;z-index:99;width:32px;text-align:center;opacity:.4}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] #sidebar-toc-btn{opacity:1}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc{position:fixed;top:0;left:0;width:300px;height:100%;padding:32px 0 48px 0;font-size:14px;box-shadow:0 0 4px rgba(150,150,150,.33);box-sizing:border-box;overflow:auto;background-color:inherit}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar{width:8px}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar-track{border-radius:10px;background-color:transparent}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar-thumb{border-radius:5px;background-color:rgba(150,150,150,.66);border:4px solid rgba(150,150,150,.66);background-clip:content-box}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc a{text-decoration:none}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc .md-toc{padding:0 16px}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc .md-toc .md-toc-link-wrapper .md-toc-link{display:inline;padding:.25rem 0}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc .md-toc .md-toc-link-wrapper .md-toc-link div,html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc .md-toc .md-toc-link-wrapper .md-toc-link p{display:inline}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc .md-toc .md-toc-link-wrapper.highlighted .md-toc-link{font-weight:800}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{left:300px;width:calc(100% - 300px);padding:2em calc(50% - 457px - 300px / 2);margin:0;box-sizing:border-box}@media screen and (max-width:1274px){html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{padding:2em}}@media screen and (max-width:450px){html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{width:100%}}html body[for=html-export]:not([data-presentation-mode]):not([html-show-sidebar-toc]) .markdown-preview{left:50%;transform:translateX(-50%)}html body[for=html-export]:not([data-presentation-mode]):not([html-show-sidebar-toc]) .md-sidebar-toc{display:none}
/* Please visit the URL below for more information: */
/*   https://shd101wyy.github.io/markdown-preview-enhanced/#/customize-css */

      </style>
      <!-- The content below will be included at the end of the <head> element. --><script type="text/javascript">
  document.addEventListener("DOMContentLoaded", function () {
    // your code here
  });
</script></head><body for="html-export">
    
    
      <div class="crossnote markdown-preview  ">
      
<h1 id="smurf-v30">SMuRF v3.0 </h1>
<p>By <a href="https://github.com/skandlab">Skandlab</a></p>
<p>Genome Institute of Singapore, A*STAR</p>
<p>Check out the <a href="https://github.com/skandlab/SMuRF/releases">latest SMuRF version here</a></p>
<p><a name="home" href=""></a></p>
<h4 id="brintroduction"><br>Introduction </h4>
<p><em>SMuRF</em> R package predicts a consensus set of somatic mutation calls using RandomForest machine learning. <em>SMuRF</em> generates a set of point mutations and insertions/deletions (indels) trained on the latest community-curated tumor whole genome sequencing data (Alioto <em>et. al.</em>, 2015, Nat. Comms.). Our method is fast and accurate and analyses both whole-genome and whole-exome sequencing data from different cancer types.</p>
<p>For more information see our Bioinformatics paper: <a href="https://doi.org/10.1093/bioinformatics/btz018">https://doi.org/10.1093/bioinformatics/btz018</a></p>
<p><strong>Citation</strong><br>
<br>Huang W, Guo YA, Chang MM and Skanderup AJ. Ensemble-Based Somatic Mutation Calling in Cancer Genomes. In: Boegel S, editor. Bioinformatics for Cancer Immunotherapy: Methods and Protocols. New York, NY: Springer US; 2020. p. 37-46.</p>
<p>Huang W, Guo YA, Muthukumar K, Baruah P, Chang MM and Skanderup AJ. SMuRF: Portable and accurate ensemble prediction of somatic mutations. Bioinformatics (Oxford, England). 2019:btz018-btz. doi:10.1093/bioinformatics/btz018.</p>
<h4 id="brtable-of-contents"><br>Table of contents </h4>
<p><a href="#input-bcbio">Input from bcbio-nextgen pipeline</a><br>
<br><a href="#input-alt">Input directly from VCF Callers (optional)</a><br>
<br><a href="#test">Test Dataset</a><br>
<br><a href="#requirements">Requirements</a><br>
<br><a href="#installation">Installation</a><br>
<br><a href="#functions">Parameters</a><br>
<br><a href="#input">Running SMuRF: Selecting the correct input vcfs</a><br>
<br><a href="#build">Running SMuRF: Detecting and changing genome build</a><br>
<br><a href="#cutoff">Running SMuRF: Tweaking SMuRF score cut-off</a><br>
<br><a href="#output">Output format</a><br>
<br><a href="#multiple-samples">Running on multiple samples</a></p>
<hr>
<p><a name="input-bcbio" href=""></a></p>
<h4 id="brinput-from-bcbio-nextgen-pipeline"><br>Input from bcbio-nextgen pipeline </h4>
<p>Before running <em>SMuRF</em>, you require output data from the <a href="http://bcbio-nextgen.readthedocs.io/en/latest/contents/pipelines.html#cancer-variant-calling">bcbio-nextgen pipeline</a> that generates the VCF output for the variant callers: MuTect2, FreeBayes, VarDict, VarScan and the latest Strelka2. An additional caller Strelka2, has been added since SMuRF 2.0  and the information is documented on our <a href="https://github.com/skandlab/SMuRF/wiki/SMuRF-3.0">wiki page</a>.</p>
<p>SMuRF v1.6.4 is still available here: <a href="https://github.com/skandlab/SMuRF/releases/tag/SMuRFv1.6.4">SMuRFv1.6.4</a><br>
<br>SMuRF v1.6.4 wiki page: <a href="https://github.com/skandlab/SMuRF/wiki/SMuRF-v1.6.4-vignette">readme file</a></p>
<p>Note that your vcf.gz files need to be tab-indexed (.tbi files required) for retrieving gene annotations in SMuRF. We would recommend the bcbio-nextgen pipeline for a better user experience.  See <a href="#input">Running SMuRF: Selecting the correct input vcfs</a> for more information.</p>
<p><em>SMuRF</em> requires the VCF output from each caller (.vcf.gz) to be placed in the same directory and files tagged with the caller (eg. sample1-mutect.vcf.gz, sample1-freebayes.vcf.gz, sample1-vardict.vcf.gz, sample1-varscan.vcf.gz)</p>
<p><a name="input-alt" href=""></a></p>
<h4 id="brinput-directly-from-vcf-callers-optional"><br>Input directly from VCF Callers (optional) </h4>
<p><strong>For Users not running bcbio-nextgen pipeline:</strong><br>
Alternatively, install and execute the individual callers.</p>
<p>Refer to the installation and instructions for each caller:<br>
<br>- <a href="https://github.com/AstraZeneca-NGS/VarDict">VarDict</a><br>
<br>- <a href="https://github.com/dkoboldt/varscan">VarScan</a><br>
<br>- <a href="https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_cancer_m2_MuTect2.php">MuTect2</a><br>
<br>- <a href="https://github.com/ekg/freebayes">FreeBayes</a><br>
<br>- <a href="https://github.com/Illumina/strelka">Strelka2</a></p>
<p><a name="test" href=""></a></p>
<h4 id="brtest-dataset"><br>Test Dataset </h4>
<p>In this vignette, we utilise a <a href="https://github.com/skandlab/SMuRF/tree/master/test">partial output dataset</a> derived from the chronic lymphocytic leukemia (CLL) data downloaded from the European Genome-phenome Archive (EGA) under the accession number EGAS00001001539. The dataset for testing the package is provided in the SMuRF package.</p>
<p><a name="requirements" href=""></a></p>
<h4 id="brrequirements"><br>Requirements </h4>
<p><strong>R 3.3 &amp; 3.4</strong> : bioconductor::VariantAnnotation</p>
<p><strong>R &gt;=3.5</strong> : BiocManager::VariantAnnotation</p>
<p><strong>h2o package</strong> :<br>
<em>If h2o package takes some time to download/install (~350MB), try manually installing from their <a href="https://h2o-release.s3.amazonaws.com/h2o/rel-yau/2/index.html">AWS page.</a></em></p>
<p><a name="installation" href=""></a></p>
<h4 id="brinstallation"><br>Installation </h4>
<p><br>1. The latest version of the package is updated on Github <a href="https://github.com/skandlab/SMuRF">https://github.com/skandlab/SMuRF</a></p>
<ol start="2">
<li>You can install the current SMuRF directly from Github via the following R commands:</li>
</ol>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code><span class="token comment">#devtools is required</span>
install.packages<span class="token punctuation">(</span><span class="token string">"devtools"</span><span class="token punctuation">)</span>
library<span class="token punctuation">(</span>devtools<span class="token punctuation">)</span>
install_github<span class="token punctuation">(</span><span class="token string">"skandlab/SMuRF"</span><span class="token punctuation">,</span> subdir<span class="token operator">=</span><span class="token string">"smurf"</span><span class="token punctuation">)</span>
</code></pre><p><br>(<em>Alternative option</em>) SMuRF installation via downloading of the package from Github:</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code><span class="token comment">#Clone or download package from Github https://github.com/skandlab/SMuRF/tree/master/smurf and save to your local directory</span>
install.packages<span class="token punctuation">(</span><span class="token string">"my/current/directory/smurf"</span><span class="token punctuation">,</span> repos <span class="token operator">=</span> <span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span> type <span class="token operator">=</span> <span class="token string">"source"</span><span class="token punctuation">)</span>
</code></pre><p><br> <em>SMuRF</em> concurrently predicts single somatic nucleotide variants (SNV) as well as small insertions and deletions (indels) and saves time by parsing the VCF files once.</p>
<p><em>Missing packages will be installed the first time you run <em>SMuRF</em>.</em></p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>library<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span> <span class="token comment">#load SMuRF package</span>

smurf<span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token comment">#check version and parameters</span>

<span class="token comment"># "SMuRFv3.0.0 (16th Jan 2024)"</span>
smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span> mode<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">,</span>
                 annotation<span class="token operator">=</span>F<span class="token punctuation">,</span> output.dir<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span>  parse.dir<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span>
                 snv.cutoff <span class="token operator">=</span> <span class="token string">'default'</span><span class="token punctuation">,</span> indel.cutoff <span class="token operator">=</span> <span class="token string">'default'</span><span class="token punctuation">,</span>
                 build<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span> change.build<span class="token operator">=</span>F<span class="token punctuation">,</span> find.build<span class="token operator">=</span>F<span class="token punctuation">,</span>
                 t.label<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span> re.tabIndex<span class="token operator">=</span>F<span class="token punctuation">,</span>
                 check.packages<span class="token operator">=</span>T<span class="token punctuation">,</span> file.exclude<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">)</span>

myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>mydir<span class="token punctuation">,</span> <span class="token string">'combined'</span><span class="token punctuation">,</span> build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">)</span> <span class="token comment">#save output into 'myresults' variable</span>

</code></pre><p><a href="#home">back to top</a></p>
<p><a name="functions" href=""></a></p>
<h4 id="brparameters"><br>Parameters </h4>
<table>
<thead>
<tr>
<th style="text-align:center">Arguments</th>
<th style="text-align:left">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:center">directory</td>
<td style="text-align:left">Choose directory where the Variant Caller Format(VCF) files are located</td>
</tr>
<tr>
<td style="text-align:center">output.dir</td>
<td style="text-align:left">Path to output directory (if saving files as .txt)</td>
</tr>
<tr>
<td style="text-align:center">parse.dir</td>
<td style="text-align:left">Specify if changing SMuRF default cutoffs. Path to the location of existing snv-parse.txt and indel-parse.txt files generated by SMuRF</td>
</tr>
<tr>
<td style="text-align:center">mode</td>
<td style="text-align:left">Choose "snv", "indel" or "combined" (snv+indel). "combined" provides a separate list of SNVs and indels.</td>
</tr>
<tr>
<td style="text-align:center">annotation</td>
<td style="text-align:left">TRUE or FALSE (default). Provide gene annotations for each variant call.</td>
</tr>
<tr>
<td style="text-align:center">nthreads</td>
<td style="text-align:left">Number of cores used for RandomForest prediction. Default (-1) for maximum number of cores. <em>For 32-bit Windows, only 1 core is allowed (nthreads=1).</em></td>
</tr>
<tr>
<td style="text-align:center">t.label</td>
<td style="text-align:left">(Optional) Provide the sample name for your tumour sample to ease the identification of the normal and tumour sample names in your vcf</td>
</tr>
<tr>
<td style="text-align:center">file.exclude</td>
<td style="text-align:left">(Optional) Additional keywords in file directory names to be filtered.</td>
</tr>
<tr>
<td style="text-align:center">build</td>
<td style="text-align:left">Specify your human genome build: build="hg19" or build="hg38"</td>
</tr>
<tr>
<td style="text-align:center">change.build</td>
<td style="text-align:left">TRUE or FALSE (default). For conversion of your genomic coordinates</td>
</tr>
<tr>
<td style="text-align:center">find.build</td>
<td style="text-align:left">TRUE or FALSE (default). Additional genome build check for the annotation step.</td>
</tr>
<tr>
<td style="text-align:center">snv.cutoff</td>
<td style="text-align:left">Default SMuRF_score cutoff for the SNV model unless a number between 0 to 1 is stated</td>
</tr>
<tr>
<td style="text-align:center">indel.cutoff</td>
<td style="text-align:left">Default SMuRF_score cutoff for the INDEL model unless a number between 0 to 1 is stated</td>
</tr>
<tr>
<td style="text-align:center">re.tabIndex</td>
<td style="text-align:left">TRUE or FALSE (default). Set to TRUE to create tab-indexed (.tbi) files for each vcf</td>
</tr>
<tr>
<td style="text-align:center">check.packages=T</td>
<td style="text-align:left">Developer mode</td>
</tr>
</tbody>
</table>
<p>For more information on the parameters see R documentation:</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>help<span class="token punctuation">(</span>smurf<span class="token punctuation">)</span>
</code></pre><br>
Examples:
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>library<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span> <span class="token comment">#load SMuRF package</span>

 myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span><span class="token string">"/path/to/directory.."</span><span class="token punctuation">,</span>
                   mode<span class="token operator">=</span><span class="token string">"snv"</span><span class="token punctuation">,</span> <span class="token comment">#snv only</span>
                   output.dir<span class="token operator">=</span><span class="token string">"/path/to/output"</span><span class="token punctuation">,</span> <span class="token comment">#saving your output</span>
                   build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">)</span>
 
 <span class="token comment">#Include gene annotations for coding regions in output</span>
 myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span><span class="token string">"/path/to/directory.."</span><span class="token punctuation">,</span>
                   mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> <span class="token comment">#snv and indel predictions</span>
                   annotation<span class="token operator">=</span>T<span class="token punctuation">,</span> <span class="token comment">#generate gene annotations</span>
                   build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">)</span>

</code></pre><p><a href="#home">back to top</a></p>
<p><a name="input" href=""></a></p>
<h4 id="brrunning-smurf-selecting-the-correct-input-vcfs"><br>Running SMuRF: Selecting the correct input vcfs </h4>
<p><br><em>SMuRF</em> requires 5 caller VCF (vcf.gz) files as input stated under the "directory" parameter. Provide a path to a directory containing all 5 caller VCF files. <strong>caller.vcf.gz</strong> (compressed) and <strong>caller.vcf</strong> are accepted formats.</p>
<p>The tab-indexed (.tbi) files for each caller are required for the parsing step. If the <strong>.tbi</strong> files are missing,  specify using <em>re.tabIndex=T</em> on SMuRF to generate these files.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
                  mode <span class="token operator">=</span><span class="token string">"snv"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> annotation <span class="token operator">=</span> T<span class="token punctuation">,</span> build <span class="token operator">=</span> <span class="token string">'hg19'</span><span class="token punctuation">,</span>
                  re.tabIndex <span class="token operator">=</span> T<span class="token punctuation">)</span>  <span class="token comment">#generate .tbi files in directory</span>
<span class="token comment">#"Generating .tbi files in directory..."</span>
<span class="token comment"># Connection successful!</span>

<span class="token comment">#If the vcf files are in different directories:</span>

<span class="token comment">#Specify directories manually</span>
 dir.list <span class="token operator">=</span> list<span class="token punctuation">(</span>mutect<span class="token operator">=</span><span class="token string">'/path1/to/mutect.vcf.gz'</span><span class="token punctuation">,</span>
                 freebayes<span class="token operator">=</span><span class="token string">'/path2/to/freebayes.vcf.gz'</span><span class="token punctuation">,</span>
                 vardict<span class="token operator">=</span><span class="token string">'/path3/to/vardict.vcf.gz'</span><span class="token punctuation">,</span>
                 varscan<span class="token operator">=</span><span class="token string">'/path4/to/varscan.vcf.gz'</span><span class="token punctuation">,</span>
                 strelka<span class="token operator">=</span><span class="token string">'/path5/to/strelka.vcf.gz'</span><span class="token punctuation">)</span>
 myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span>dir.list<span class="token punctuation">,</span> 
                   mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">)</span>

</code></pre><p><br>In some cases, your input directory may contain other VCF files generated by bcbio. For example, germline VCF files, copy-number related files, older version VCFs. An exclusion <em>file.exclude</em> can be added to make sure that SMuRF selects the correct VCF files.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>list.files<span class="token punctuation">(</span>directory<span class="token punctuation">)</span>
<span class="token comment"># sample1.mutect.vcf.gz</span>
<span class="token comment"># sample1.mutect-germline.vcf.gz #to be excluded</span>
<span class="token comment"># sample1.freebayes.vcf.gz</span>
<span class="token comment"># sample1.vardict.vcf.gz</span>
<span class="token comment"># sample1.varscan.vcf.gz</span>
<span class="token comment"># sample1.varscan-version1.vcf.gz #to be excluded</span>
<span class="token comment"># sample1.strelka.vcf.gz</span>
<span class="token comment"># sample1.strelka-archive.vcf.gz #to be excluded</span>

myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span><span class="token string">"/path/to/directory.."</span><span class="token punctuation">,</span> 
                  file.exclude <span class="token operator">=</span> c<span class="token punctuation">(</span><span class="token string">"germline"</span><span class="token punctuation">,</span><span class="token string">"version1"</span><span class="token punctuation">,</span><span class="token string">"archive"</span><span class="token punctuation">)</span> <span class="token comment">#keywords in file name to be excluded</span>
                  mode<span class="token operator">=</span><span class="token string">"snv"</span><span class="token punctuation">,</span>
                  output.dir<span class="token operator">=</span><span class="token string">"/path/to/output"</span><span class="token punctuation">,</span> build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">)</span>
</code></pre><p><br>It is optional to indicate your normal and tumour sample labels. <em>SMuRF</em> detects your normal and tumour sample names in order to generate variant allele frequency information. If this information is missing in your VCF headers, <em>SMuRF</em> will terminate with an error. State your unique tumour sample label using <em>t.label</em>.</p>
<p>Possible normal/tumour sample labels:</p>
<p>sample1-N, sample1-T<br>
<br>sample1_normal, sample1_tumour<br>
<br>sample1.healthy, sample1.cancer</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
                  mode <span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> build <span class="token operator">=</span> <span class="token string">'hg19'</span><span class="token punctuation">,</span>
                  t.label <span class="token operator">=</span> <span class="token string">'tumour'</span> <span class="token comment">#optional if labels were detected from vcf headers correctly</span>
                  <span class="token punctuation">)</span>
</code></pre><p><a href="#home">back to top</a></p>
<p><a name="build" href=""></a></p>
<h4 id="brrunning-smurf-detecting-and-changing-genome-build"><br>Running SMuRF: Detecting and changing genome build </h4>
<p><br> The genome build for your sample must be specified ( <em>build='hg19'</em> or <em>build='hg38'</em> ).</p>
<p>hg19 also refers to the Genome Reference Consortium Human Build 37 (GRCh37)<br>
<br>hg38 also refers to the Genome Reference Consortium Human Build 38 (GRCh38)</p>
<p>The genome build stated in <em>SMuRF</em> will be cross-checked with the build used in your VCF files.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
                  mode <span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> annotation <span class="token operator">=</span> T<span class="token punctuation">,</span> 
                  build <span class="token operator">=</span> <span class="token string">'hg38'</span> <span class="token comment">#wrong build stated</span>
                  <span class="token punctuation">)</span>
<span class="token comment"># "Genome build stated in SMuRF:"</span>
<span class="token comment"># "hg38"</span>
<span class="token comment"># "Ref genome used in vcf:"</span>
<span class="token comment"># "file:///home/projects/13001264/softwares/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa"</span>
<span class="token comment"># "Warning: build provided does not match ref genome used in vcf. SMuRF CDS annotation may not run properly if genome build is incorrect."</span>
<span class="token comment"># "Final genome build used for analysis: hg38"</span>
<span class="token comment"># </span>
<span class="token comment"># Warning message</span>
</code></pre><p><br>If you are unsure of the genome build used in your analysis, specify <em>find.build=T</em>.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
                  mode <span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> annotation <span class="token operator">=</span> T<span class="token punctuation">,</span> 
                  build <span class="token operator">=</span> <span class="token string">'hg38'</span><span class="token punctuation">,</span> <span class="token comment">#wrong build stated</span>
                  find.build <span class="token operator">=</span> T<span class="token punctuation">,</span> <span class="token comment">#if unsure of genome build</span>
                  <span class="token punctuation">)</span>
<span class="token comment"># "Genome build stated in SMuRF:"</span>
<span class="token comment"># "hg38"</span>
<span class="token comment"># "Ref genome used in vcf:"</span>
<span class="token comment"># "file:///home/projects/13001264/softwares/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa"</span>
<span class="token comment"># "Warning: build provided does not match ref genome used in vcf. SMuRF CDS annotation may not run properly if genome build is incorrect."</span>
<span class="token comment"># "Changing build variable provided"</span>
<span class="token comment"># "hg38 -&gt; hg19"</span>
<span class="token comment"># "Final genome build used for analysis: hg19"</span>

<span class="token comment"># No errors</span>
</code></pre><p><br>Samples from different batches may be aligned to a different genome reference build. In order to standardize your gene annotations and output, specify <em>change.build</em> for genome build conversion.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
                  mode <span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> annotation <span class="token operator">=</span> T<span class="token punctuation">,</span> 
                  build <span class="token operator">=</span> <span class="token string">'hg19'</span><span class="token punctuation">,</span>
                  change.build <span class="token operator">=</span> T<span class="token punctuation">,</span> <span class="token comment">#genome build conversion</span>
                  <span class="token punctuation">)</span>
<span class="token comment"># "Genome build stated in SMuRF:"</span>
<span class="token comment"># "hg19"</span>
<span class="token comment"># "Ref genome used in vcf:"</span>
<span class="token comment"># "file:///home/projects/13001264/softwares/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa"</span>
<span class="token comment"># "Final genome build used for analysis: hg19"</span>

<span class="token comment"># "Compiling annotations"</span>
<span class="token comment"># "Changing annotations from hg19 to hg38"</span>
</code></pre><p><a href="#home">back to top</a></p>
<p><a name="cutoff" href=""></a></p>
<h4 id="brrunning-smurf-tweaking-smurf-score-cut-off"><br>Running SMuRF: Tweaking SMuRF score cut-off </h4>
<p><br><em>SMuRF</em> v3.0.0 is fine-tuned to achieve the max f1 score in our test set.</p>
<p>Re-adjust the stringency of the prediction with a specific cut-off value.<br>
Use parameters <em>snv.cutoff</em> or <em>indel.cutoff</em> to adjust the thresholds (higher cut-off provide a smaller set of calls with better confidence).</p>
<p>To re-adjust the cut-off value of an <strong>existing</strong> SMuRF output, simply provide the <em>parse.dir</em> to the snv-parse and indel-parse files for re-processing.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code><span class="token comment">#start with default cutoffs</span>
myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
                  mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> 
                  snv.cutoff<span class="token operator">=</span><span class="token string">'default'</span><span class="token punctuation">,</span> indel.cutoff<span class="token operator">=</span><span class="token string">'default'</span><span class="token punctuation">,</span>
                  output.dir <span class="token operator">=</span> <span class="token string">'C:/Users/admin/myresults'</span><span class="token punctuation">)</span> 

<span class="token comment">#modify cutoff from existing SMuRF parse files</span>
myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
                  mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span>
                  snv.cutoff<span class="token operator">=</span><span class="token number">0.2</span><span class="token punctuation">,</span> indel.cutoff<span class="token operator">=</span><span class="token number">0.1</span><span class="token punctuation">,</span> <span class="token comment">#specify new cutoffs</span>
                  parse.dir <span class="token operator">=</span> <span class="token string">'C:/Users/admin/myresults'</span><span class="token punctuation">,</span> <span class="token comment">#SMuRF path existing parse.txt files</span>
                  output.dir <span class="token operator">=</span> <span class="token string">'C:/Users/admin/myresults2'</span> <span class="token comment">#new output) </span>

<span class="token comment">#Plot histogram</span>
hist<span class="token punctuation">(</span>as.numeric<span class="token punctuation">(</span>myresults<span class="token operator">$</span>smurf_indel<span class="token operator">$</span>predicted_indel<span class="token punctuation">[</span><span class="token punctuation">,</span><span class="token string">'SMuRF_score'</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">,</span> main <span class="token operator">=</span> <span class="token string">'Re-adjusted predicted indels'</span><span class="token punctuation">,</span> xlab <span class="token operator">=</span> <span class="token string">'SMuRF_score'</span><span class="token punctuation">,</span> col <span class="token operator">=</span> <span class="token string">'grey50'</span><span class="token punctuation">)</span>
</code></pre><p><a href="#home">back to top</a></p>
<p><a name="output" href=""></a></p>
<h4 id="broutput-format"><br>Output format </h4>
<p>Output files available include:</p>
<ol>
<li>
<p>Parsed-raw file (<em>parse</em>)</p>
</li>
<li>
<p>Predicted positive mutations (<em>predicted</em>)</p>
</li>
<li>
<p>Predicted positive mutations with annotations (<em>annotated</em>)* #for smurf's "cdsannotation" function only</p>
</li>
<li>
<p>Variant statistics (<em>stats</em>)</p>
</li>
<li>
<p>Time taken (<em>time</em>)</p>
</li>
</ol>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code><span class="token comment">#Viewing predicted output in R</span>

myresults<span class="token operator">$</span>smurf_snv<span class="token operator">$</span>predicted_snv

myresults<span class="token operator">$</span>smurf_indel<span class="token operator">$</span>predicted_indel

<span class="token comment">#see column description below</span>
</code></pre><table>
<thead>
<tr>
<th>Column</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>Chr</td>
<td>Chromosome number</td>
</tr>
<tr>
<td>START_POS_REF/END_POS_REF</td>
<td>Start and End nucleotide position of the somatic mutation</td>
</tr>
<tr>
<td>REF/ALT</td>
<td>Consensus Ref and Alt nucleotide changes of the highest likelihood</td>
</tr>
<tr>
<td>REF_MFVdVs/ALT_MFVdVs</td>
<td>Reference and Alternative nucleotide changes from each caller; Mutect2 (M), Freebayes (F), Vardict (Vd), Varscan (Vs) and Strelka2 (not abbreviated to preserve column name)</td>
</tr>
<tr>
<td>FILTER</td>
<td>Pass (TRUE) or Reject (FALSE) [boolean] mutation calls from the individual callers</td>
</tr>
<tr>
<td>Sample_Name</td>
<td>Sample name is extracted based on your labeled samples in the vcf files</td>
</tr>
<tr>
<td>Alt_Allele_Freq</td>
<td>Mean Variant allele frequency calculated from the tumor reads of the callers</td>
</tr>
<tr>
<td>Depth ref/alt N/T</td>
<td>Mean read depth from the N/T sample for ref/alt alleles</td>
</tr>
<tr>
<td>SMuRF_score</td>
<td>SMuRF confidence score of the predicted mutation</td>
</tr>
</tbody>
</table>
<br>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults<span class="token operator">$</span>smurf_indel<span class="token operator">$</span>stats_indel

<span class="token comment">#             Passed_Calls</span>
<span class="token comment"># Strelka2             466</span>
<span class="token comment"># Mutect2              232</span>
<span class="token comment"># FreeBayes            306</span>
<span class="token comment"># VarDict              483</span>
<span class="token comment"># VarScan             1273</span>
<span class="token comment"># Atleast1            2431</span>
<span class="token comment"># Atleast2             278</span>
<span class="token comment"># Atleast3              43</span>
<span class="token comment"># Atleast4               7</span>
<span class="token comment"># All5                   1</span>
<span class="token comment"># SMuRF_INDEL           88</span>

myresults<span class="token operator">$</span>smurf_snv<span class="token operator">$</span>stats_snv

<span class="token comment">#           Passed_Calls</span>
<span class="token comment"># Strelka2          1362</span>
<span class="token comment"># Mutect2           1539</span>
<span class="token comment"># FreeBayes          216</span>
<span class="token comment"># VarDict            239</span>
<span class="token comment"># VarScan           1734</span>
<span class="token comment"># Atleast1          4017</span>
<span class="token comment"># Atleast2           928</span>
<span class="token comment"># Atleast3            60</span>
<span class="token comment"># Atleast4            48</span>
<span class="token comment"># All5                37</span>
<span class="token comment"># SMuRF_SNV         1043</span>
</code></pre><p><br>We added gene annotations using SnpEff (from bcbio) and <em>SMuRF</em> extracts the coding annotations from the canonical transcripts with the highest fucntional impact. Take note that your vcf.gz files should be tab-indexed (see <a href="#input">Running SMuRF: re.tabIndex</a>).</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>mydir<span class="token punctuation">,</span> <span class="token string">"cdsannotation"</span><span class="token punctuation">)</span> <span class="token comment">#runs SMuRF for SNV and indels + generate annotations</span>

myresults<span class="token operator">$</span>smurf_snv_annotation<span class="token operator">$</span>annotated<span class="token punctuation">[</span>order<span class="token punctuation">(</span>myresults<span class="token operator">$</span>smurf_snv_annotation<span class="token operator">$</span>annotated<span class="token operator">$</span>REGION<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">1</span><span class="token operator">:</span><span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">,</span><span class="token punctuation">]</span>
<span class="token comment">#    Chr START_POS_REF END_POS_REF REF ALT   REF_MFVdVs   ALT_MFVdVs FILTER_Mutect2 FILTER_Freebayes FILTER_Vardict</span>
<span class="token comment"># 52   1      77806132    77806132   G   A    G/G/G/G/G    A/A/A/A/A           TRUE             TRUE           TRUE</span>
<span class="token comment"># 81   1     170961432   170961432   C   T C/NA/NA/NA/C T/NA/NA/NA/T           TRUE            FALSE          FALSE</span>
<span class="token comment">#    FILTER_Varscan FILTER_Strelka2     Sample_Name Alt_Allele_Freq N_refDepth N_altDepth T_refDepth T_altDepth Allele</span>
<span class="token comment"># 52           TRUE            TRUE icgc_cll_tumour           0.500         14          0         15         15      A</span>
<span class="token comment"># 81          FALSE            TRUE icgc_cll_tumour           0.467         33          0         16         14      T</span>
<span class="token comment">#          Annotation   Impact Gene_name         Gene_ID Feature_Type      Feature_ID Transcript_BioType  Rank    HGVS.c</span>
<span class="token comment"># 52 missense_variant MODERATE       AK5 ENSG00000154027   transcript ENST00000354567     protein_coding  6/14  c.770G&gt;A</span>
<span class="token comment"># 81 missense_variant MODERATE     MROH9 ENSG00000117501   transcript ENST00000367759     protein_coding 12/22 c.1156C&gt;T</span>
<span class="token comment">#         HGVS.p  cDNA.pos   CDS.pos  AA.pos Distance REGION SMuRF_score</span>
<span class="token comment"># 52 p.Arg257His 1033/3248  770/1689 257/562        .    CDS   0.9083840</span>
<span class="token comment"># 81 p.Arg386Cys 1310/3165 1156/2586 386/861        .    CDS   0.8107475</span>
</code></pre><p><br>Time taken for your run:</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults<span class="token operator">$</span>time.taken

<span class="token operator">&lt;</span><span class="token operator">!</span><span class="token operator">-</span><span class="token operator">-</span> Time difference of <span class="token number">20.52405</span> secs <span class="token operator">-</span><span class="token operator">-&gt;</span>
</code></pre><p><br>The raw parsed output:</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults<span class="token operator">$</span>smurf_indel<span class="token operator">$</span>parse_indel

myresults<span class="token operator">$</span>smurf_snv<span class="token operator">$</span>parse_snv

</code></pre><p><br>Indicate the <em>output.dir</em> to save the <em>SMuRF</em> output as tab-delimited .txt files in your targeted directory.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
                  mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> 
                  output.dir <span class="token operator">=</span> <span class="token string">'C:/Users/admin/myresults'</span> <span class="token comment">#path to output directory</span>
                  <span class="token punctuation">)</span> 
</code></pre><p><a href="#home">back to top</a></p>
<p><a name="multiple-samples" href=""></a></p>
<h4 id="brrunning-on-multiple-samples"><br>Running on multiple samples </h4>
<p>Iterate over multiple samples by providing the list of directories of where your sample files are located.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>project.dir <span class="token operator">=</span> <span class="token string">'path/to/my/dir'</span>
list.files<span class="token punctuation">(</span>project.dir<span class="token punctuation">)</span>
<span class="token comment"># sample_A</span>
<span class="token comment"># sample_B</span>
<span class="token comment"># sample_C</span>
samples <span class="token operator">=</span> c<span class="token punctuation">(</span><span class="token string">'sample_A'</span><span class="token punctuation">,</span> <span class="token string">'sample_B'</span><span class="token punctuation">,</span> <span class="token string">'sample_C'</span><span class="token punctuation">)</span> <span class="token comment">#sample dir where vcf files are located</span>

<span class="token keyword keyword-for">for</span><span class="token punctuation">(</span>i <span class="token keyword keyword-in">in</span> <span class="token number">1</span><span class="token operator">:</span>length<span class="token punctuation">(</span>samples<span class="token punctuation">)</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
 smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span>paste0<span class="token punctuation">(</span>project.dir<span class="token punctuation">,</span> <span class="token string">'/'</span><span class="token punctuation">,</span> samples<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
        mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">,</span> annotation <span class="token operator">=</span> T<span class="token punctuation">,</span>
        output.dir <span class="token operator">=</span> paste0<span class="token punctuation">(</span><span class="token string">'C:/Users/admin/myresults/'</span><span class="token punctuation">,</span>samples<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
 <span class="token punctuation">}</span> 
</code></pre><p>Running SMuRF on multiple samples on a cluster (parallel multi-core instance)</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>install.packages<span class="token punctuation">(</span><span class="token string">"foreach"</span><span class="token punctuation">)</span>
install.packages<span class="token punctuation">(</span><span class="token string">"doParallel"</span><span class="token punctuation">)</span>
install.packages<span class="token punctuation">(</span><span class="token string">"doSNOW"</span><span class="token punctuation">)</span>

library<span class="token punctuation">(</span>foreach<span class="token punctuation">)</span>
library<span class="token punctuation">(</span>doParallel<span class="token punctuation">)</span>
library<span class="token punctuation">(</span>doSNOW<span class="token punctuation">)</span>
library<span class="token punctuation">(</span>smurf<span class="token punctuation">)</span>

project.dir <span class="token operator">=</span> <span class="token string">'path/to/my/dir'</span>
samples <span class="token operator">=</span> Sys.glob<span class="token punctuation">(</span>paste0<span class="token punctuation">(</span>project.dir<span class="token punctuation">,</span><span class="token string">'/*'</span><span class="token punctuation">)</span><span class="token punctuation">)</span>

<span class="token comment">#setup parallel backend to use many processors</span>
cores<span class="token operator">=</span>detectCores<span class="token punctuation">(</span><span class="token punctuation">)</span>
cl <span class="token operator">&lt;-</span> makeCluster<span class="token punctuation">(</span>cores<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">)</span> <span class="token comment">#not to overload your computer</span>
registerDoParallel<span class="token punctuation">(</span>cl<span class="token punctuation">)</span>

foreach<span class="token punctuation">(</span>i<span class="token operator">=</span><span class="token number">1</span><span class="token operator">:</span>length<span class="token punctuation">(</span>samples<span class="token punctuation">)</span><span class="token punctuation">,</span> .packages <span class="token operator">=</span> <span class="token string">'smurf'</span><span class="token punctuation">,</span> .verbose <span class="token operator">=</span> F<span class="token punctuation">)</span> <span class="token percent-operator operator">%dopar%</span> <span class="token punctuation">{</span>
print<span class="token punctuation">(</span>i<span class="token punctuation">)</span>
  smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>project.dir<span class="token punctuation">,</span> <span class="token string">'/'</span><span class="token punctuation">,</span> samples<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
      mode <span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> build <span class="token operator">=</span> <span class="token string">'hg19'</span><span class="token punctuation">,</span>
      output.dir <span class="token operator">=</span> paste0<span class="token punctuation">(</span><span class="token string">'C:/Users/admin/myresults/'</span><span class="token punctuation">,</span>samples<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token punctuation">)</span>
<span class="token punctuation">}</span>
stopCluster<span class="token punctuation">(</span>cl<span class="token punctuation">)</span>
h2o.shutdown<span class="token punctuation">(</span><span class="token punctuation">)</span>
</code></pre><br>
For errors and bugs, please report on our Github page.
<p><a href="#home">back to top</a></p>

      </div>
      
      
    
    
    
    
    
    
  
    </body></html>