forked from skandlab/SMuRF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Readme.html
504 lines (454 loc) · 61.4 KB
/
Readme.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
<!DOCTYPE html><html><head>
<title>Readme</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="file:////home/users/astar/gis/pangmy/.vscode-server/extensions/shd101wyy.markdown-preview-enhanced-0.8.11/crossnote/dependencies/katex/katex.min.css">
<style>
code[class*=language-],pre[class*=language-]{color:#333;background:0 0;font-family:Consolas,"Liberation Mono",Menlo,Courier,monospace;text-align:left;white-space:pre;word-spacing:normal;word-break:normal;word-wrap:normal;line-height:1.4;-moz-tab-size:8;-o-tab-size:8;tab-size:8;-webkit-hyphens:none;-moz-hyphens:none;-ms-hyphens:none;hyphens:none}pre[class*=language-]{padding:.8em;overflow:auto;border-radius:3px;background:#f5f5f5}:not(pre)>code[class*=language-]{padding:.1em;border-radius:.3em;white-space:normal;background:#f5f5f5}.token.blockquote,.token.comment{color:#969896}.token.cdata{color:#183691}.token.doctype,.token.macro.property,.token.punctuation,.token.variable{color:#333}.token.builtin,.token.important,.token.keyword,.token.operator,.token.rule{color:#a71d5d}.token.attr-value,.token.regex,.token.string,.token.url{color:#183691}.token.atrule,.token.boolean,.token.code,.token.command,.token.constant,.token.entity,.token.number,.token.property,.token.symbol{color:#0086b3}.token.prolog,.token.selector,.token.tag{color:#63a35c}.token.attr-name,.token.class,.token.class-name,.token.function,.token.id,.token.namespace,.token.pseudo-class,.token.pseudo-element,.token.url-reference .token.variable{color:#795da3}.token.entity{cursor:help}.token.title,.token.title .token.punctuation{font-weight:700;color:#1d3e81}.token.list{color:#ed6a43}.token.inserted{background-color:#eaffea;color:#55a532}.token.deleted{background-color:#ffecec;color:#bd2c00}.token.bold{font-weight:700}.token.italic{font-style:italic}.language-json .token.property{color:#183691}.language-markup .token.tag .token.punctuation{color:#333}.language-css .token.function,code.language-css{color:#0086b3}.language-yaml .token.atrule{color:#63a35c}code.language-yaml{color:#183691}.language-ruby .token.function{color:#333}.language-markdown .token.url{color:#795da3}.language-makefile .token.symbol{color:#795da3}.language-makefile .token.variable{color:#183691}.language-makefile .token.builtin{color:#0086b3}.language-bash .token.keyword{color:#0086b3}pre[data-line]{position:relative;padding:1em 0 1em 3em}pre[data-line] .line-highlight-wrapper{position:absolute;top:0;left:0;background-color:transparent;display:block;width:100%}pre[data-line] .line-highlight{position:absolute;left:0;right:0;padding:inherit 0;margin-top:1em;background:hsla(24,20%,50%,.08);background:linear-gradient(to right,hsla(24,20%,50%,.1) 70%,hsla(24,20%,50%,0));pointer-events:none;line-height:inherit;white-space:pre}pre[data-line] .line-highlight:before,pre[data-line] .line-highlight[data-end]:after{content:attr(data-start);position:absolute;top:.4em;left:.6em;min-width:1em;padding:0 .5em;background-color:hsla(24,20%,50%,.4);color:#f4f1ef;font:bold 65%/1.5 sans-serif;text-align:center;vertical-align:.3em;border-radius:999px;text-shadow:none;box-shadow:0 1px #fff}pre[data-line] .line-highlight[data-end]:after{content:attr(data-end);top:auto;bottom:.4em}html body{font-family:'Helvetica Neue',Helvetica,'Segoe UI',Arial,freesans,sans-serif;font-size:16px;line-height:1.6;color:#333;background-color:#fff;overflow:initial;box-sizing:border-box;word-wrap:break-word}html body>:first-child{margin-top:0}html body h1,html body h2,html body h3,html body h4,html body h5,html body h6{line-height:1.2;margin-top:1em;margin-bottom:16px;color:#000}html body h1{font-size:2.25em;font-weight:300;padding-bottom:.3em}html body h2{font-size:1.75em;font-weight:400;padding-bottom:.3em}html body h3{font-size:1.5em;font-weight:500}html body h4{font-size:1.25em;font-weight:600}html body h5{font-size:1.1em;font-weight:600}html body h6{font-size:1em;font-weight:600}html body h1,html body h2,html body h3,html body h4,html body h5{font-weight:600}html body h5{font-size:1em}html body h6{color:#5c5c5c}html body strong{color:#000}html body del{color:#5c5c5c}html body a:not([href]){color:inherit;text-decoration:none}html body a{color:#08c;text-decoration:none}html body a:hover{color:#00a3f5;text-decoration:none}html body img{max-width:100%}html body>p{margin-top:0;margin-bottom:16px;word-wrap:break-word}html body>ol,html body>ul{margin-bottom:16px}html body ol,html body ul{padding-left:2em}html body ol.no-list,html body ul.no-list{padding:0;list-style-type:none}html body ol ol,html body ol ul,html body ul ol,html body ul ul{margin-top:0;margin-bottom:0}html body li{margin-bottom:0}html body li.task-list-item{list-style:none}html body li>p{margin-top:0;margin-bottom:0}html body .task-list-item-checkbox{margin:0 .2em .25em -1.8em;vertical-align:middle}html body .task-list-item-checkbox:hover{cursor:pointer}html body blockquote{margin:16px 0;font-size:inherit;padding:0 15px;color:#5c5c5c;background-color:#f0f0f0;border-left:4px solid #d6d6d6}html body blockquote>:first-child{margin-top:0}html body blockquote>:last-child{margin-bottom:0}html body hr{height:4px;margin:32px 0;background-color:#d6d6d6;border:0 none}html body table{margin:10px 0 15px 0;border-collapse:collapse;border-spacing:0;display:block;width:100%;overflow:auto;word-break:normal;word-break:keep-all}html body table th{font-weight:700;color:#000}html body table td,html body table th{border:1px solid #d6d6d6;padding:6px 13px}html body dl{padding:0}html body dl dt{padding:0;margin-top:16px;font-size:1em;font-style:italic;font-weight:700}html body dl dd{padding:0 16px;margin-bottom:16px}html body code{font-family:Menlo,Monaco,Consolas,'Courier New',monospace;font-size:.85em;color:#000;background-color:#f0f0f0;border-radius:3px;padding:.2em 0}html body code::after,html body code::before{letter-spacing:-.2em;content:'\00a0'}html body pre>code{padding:0;margin:0;word-break:normal;white-space:pre;background:0 0;border:0}html body .highlight{margin-bottom:16px}html body .highlight pre,html body pre{padding:1em;overflow:auto;line-height:1.45;border:#d6d6d6;border-radius:3px}html body .highlight pre{margin-bottom:0;word-break:normal}html body pre code,html body pre tt{display:inline;max-width:initial;padding:0;margin:0;overflow:initial;line-height:inherit;word-wrap:normal;background-color:transparent;border:0}html body pre code:after,html body pre code:before,html body pre tt:after,html body pre tt:before{content:normal}html body blockquote,html body dl,html body ol,html body p,html body pre,html body ul{margin-top:0;margin-bottom:16px}html body kbd{color:#000;border:1px solid #d6d6d6;border-bottom:2px solid #c7c7c7;padding:2px 4px;background-color:#f0f0f0;border-radius:3px}@media print{html body{background-color:#fff}html body h1,html body h2,html body h3,html body h4,html body h5,html body h6{color:#000;page-break-after:avoid}html body blockquote{color:#5c5c5c}html body pre{page-break-inside:avoid}html body table{display:table}html body img{display:block;max-width:100%;max-height:100%}html body code,html body pre{word-wrap:break-word;white-space:pre}}.markdown-preview{width:100%;height:100%;box-sizing:border-box}.markdown-preview ul{list-style:disc}.markdown-preview ul ul{list-style:circle}.markdown-preview ul ul ul{list-style:square}.markdown-preview ol{list-style:decimal}.markdown-preview ol ol,.markdown-preview ul ol{list-style-type:lower-roman}.markdown-preview ol ol ol,.markdown-preview ol ul ol,.markdown-preview ul ol ol,.markdown-preview ul ul ol{list-style-type:lower-alpha}.markdown-preview .newpage,.markdown-preview .pagebreak{page-break-before:always}.markdown-preview pre.line-numbers{position:relative;padding-left:3.8em;counter-reset:linenumber}.markdown-preview pre.line-numbers>code{position:relative}.markdown-preview pre.line-numbers .line-numbers-rows{position:absolute;pointer-events:none;top:1em;font-size:100%;left:0;width:3em;letter-spacing:-1px;border-right:1px solid #999;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.markdown-preview pre.line-numbers .line-numbers-rows>span{pointer-events:none;display:block;counter-increment:linenumber}.markdown-preview pre.line-numbers .line-numbers-rows>span:before{content:counter(linenumber);color:#999;display:block;padding-right:.8em;text-align:right}.markdown-preview .mathjax-exps .MathJax_Display{text-align:center!important}.markdown-preview:not([data-for=preview]) .code-chunk .code-chunk-btn-group{display:none}.markdown-preview:not([data-for=preview]) .code-chunk .status{display:none}.markdown-preview:not([data-for=preview]) .code-chunk .output-div{margin-bottom:16px}.markdown-preview .md-toc{padding:0}.markdown-preview .md-toc .md-toc-link-wrapper .md-toc-link{display:inline;padding:.25rem 0}.markdown-preview .md-toc .md-toc-link-wrapper .md-toc-link div,.markdown-preview .md-toc .md-toc-link-wrapper .md-toc-link p{display:inline}.markdown-preview .md-toc .md-toc-link-wrapper.highlighted .md-toc-link{font-weight:800}.scrollbar-style::-webkit-scrollbar{width:8px}.scrollbar-style::-webkit-scrollbar-track{border-radius:10px;background-color:transparent}.scrollbar-style::-webkit-scrollbar-thumb{border-radius:5px;background-color:rgba(150,150,150,.66);border:4px solid rgba(150,150,150,.66);background-clip:content-box}html body[for=html-export]:not([data-presentation-mode]){position:relative;width:100%;height:100%;top:0;left:0;margin:0;padding:0;overflow:auto}html body[for=html-export]:not([data-presentation-mode]) .markdown-preview{position:relative;top:0;min-height:100vh}@media screen and (min-width:914px){html body[for=html-export]:not([data-presentation-mode]) .markdown-preview{padding:2em calc(50% - 457px + 2em)}}@media screen and (max-width:914px){html body[for=html-export]:not([data-presentation-mode]) .markdown-preview{padding:2em}}@media screen and (max-width:450px){html body[for=html-export]:not([data-presentation-mode]) .markdown-preview{font-size:14px!important;padding:1em}}@media print{html body[for=html-export]:not([data-presentation-mode]) #sidebar-toc-btn{display:none}}html body[for=html-export]:not([data-presentation-mode]) #sidebar-toc-btn{position:fixed;bottom:8px;left:8px;font-size:28px;cursor:pointer;color:inherit;z-index:99;width:32px;text-align:center;opacity:.4}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] #sidebar-toc-btn{opacity:1}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc{position:fixed;top:0;left:0;width:300px;height:100%;padding:32px 0 48px 0;font-size:14px;box-shadow:0 0 4px rgba(150,150,150,.33);box-sizing:border-box;overflow:auto;background-color:inherit}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar{width:8px}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar-track{border-radius:10px;background-color:transparent}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar-thumb{border-radius:5px;background-color:rgba(150,150,150,.66);border:4px solid rgba(150,150,150,.66);background-clip:content-box}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc a{text-decoration:none}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc .md-toc{padding:0 16px}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc .md-toc .md-toc-link-wrapper .md-toc-link{display:inline;padding:.25rem 0}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc .md-toc .md-toc-link-wrapper .md-toc-link div,html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc .md-toc .md-toc-link-wrapper .md-toc-link p{display:inline}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc .md-toc .md-toc-link-wrapper.highlighted .md-toc-link{font-weight:800}html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{left:300px;width:calc(100% - 300px);padding:2em calc(50% - 457px - 300px / 2);margin:0;box-sizing:border-box}@media screen and (max-width:1274px){html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{padding:2em}}@media screen and (max-width:450px){html body[for=html-export]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{width:100%}}html body[for=html-export]:not([data-presentation-mode]):not([html-show-sidebar-toc]) .markdown-preview{left:50%;transform:translateX(-50%)}html body[for=html-export]:not([data-presentation-mode]):not([html-show-sidebar-toc]) .md-sidebar-toc{display:none}
/* Please visit the URL below for more information: */
/* https://shd101wyy.github.io/markdown-preview-enhanced/#/customize-css */
</style>
<!-- The content below will be included at the end of the <head> element. --><script type="text/javascript">
document.addEventListener("DOMContentLoaded", function () {
// your code here
});
</script></head><body for="html-export">
<div class="crossnote markdown-preview ">
<h1 id="smurf-v30">SMuRF v3.0 </h1>
<p>By <a href="https://github.com/skandlab">Skandlab</a></p>
<p>Genome Institute of Singapore, A*STAR</p>
<p>Check out the <a href="https://github.com/skandlab/SMuRF/releases">latest SMuRF version here</a></p>
<p><a name="home" href=""></a></p>
<h4 id="brintroduction"><br>Introduction </h4>
<p><em>SMuRF</em> R package predicts a consensus set of somatic mutation calls using RandomForest machine learning. <em>SMuRF</em> generates a set of point mutations and insertions/deletions (indels) trained on the latest community-curated tumor whole genome sequencing data (Alioto <em>et. al.</em>, 2015, Nat. Comms.). Our method is fast and accurate and analyses both whole-genome and whole-exome sequencing data from different cancer types.</p>
<p>For more information see our Bioinformatics paper: <a href="https://doi.org/10.1093/bioinformatics/btz018">https://doi.org/10.1093/bioinformatics/btz018</a></p>
<p><strong>Citation</strong><br>
<br>Huang W, Guo YA, Chang MM and Skanderup AJ. Ensemble-Based Somatic Mutation Calling in Cancer Genomes. In: Boegel S, editor. Bioinformatics for Cancer Immunotherapy: Methods and Protocols. New York, NY: Springer US; 2020. p. 37-46.</p>
<p>Huang W, Guo YA, Muthukumar K, Baruah P, Chang MM and Skanderup AJ. SMuRF: Portable and accurate ensemble prediction of somatic mutations. Bioinformatics (Oxford, England). 2019:btz018-btz. doi:10.1093/bioinformatics/btz018.</p>
<h4 id="brtable-of-contents"><br>Table of contents </h4>
<p><a href="#input-bcbio">Input from bcbio-nextgen pipeline</a><br>
<br><a href="#input-alt">Input directly from VCF Callers (optional)</a><br>
<br><a href="#test">Test Dataset</a><br>
<br><a href="#requirements">Requirements</a><br>
<br><a href="#installation">Installation</a><br>
<br><a href="#functions">Parameters</a><br>
<br><a href="#input">Running SMuRF: Selecting the correct input vcfs</a><br>
<br><a href="#build">Running SMuRF: Detecting and changing genome build</a><br>
<br><a href="#cutoff">Running SMuRF: Tweaking SMuRF score cut-off</a><br>
<br><a href="#output">Output format</a><br>
<br><a href="#multiple-samples">Running on multiple samples</a></p>
<hr>
<p><a name="input-bcbio" href=""></a></p>
<h4 id="brinput-from-bcbio-nextgen-pipeline"><br>Input from bcbio-nextgen pipeline </h4>
<p>Before running <em>SMuRF</em>, you require output data from the <a href="http://bcbio-nextgen.readthedocs.io/en/latest/contents/pipelines.html#cancer-variant-calling">bcbio-nextgen pipeline</a> that generates the VCF output for the variant callers: MuTect2, FreeBayes, VarDict, VarScan and the latest Strelka2. An additional caller Strelka2, has been added since SMuRF 2.0 and the information is documented on our <a href="https://github.com/skandlab/SMuRF/wiki/SMuRF-3.0">wiki page</a>.</p>
<p>SMuRF v1.6.4 is still available here: <a href="https://github.com/skandlab/SMuRF/releases/tag/SMuRFv1.6.4">SMuRFv1.6.4</a><br>
<br>SMuRF v1.6.4 wiki page: <a href="https://github.com/skandlab/SMuRF/wiki/SMuRF-v1.6.4-vignette">readme file</a></p>
<p>Note that your vcf.gz files need to be tab-indexed (.tbi files required) for retrieving gene annotations in SMuRF. We would recommend the bcbio-nextgen pipeline for a better user experience. See <a href="#input">Running SMuRF: Selecting the correct input vcfs</a> for more information.</p>
<p><em>SMuRF</em> requires the VCF output from each caller (.vcf.gz) to be placed in the same directory and files tagged with the caller (eg. sample1-mutect.vcf.gz, sample1-freebayes.vcf.gz, sample1-vardict.vcf.gz, sample1-varscan.vcf.gz)</p>
<p><a name="input-alt" href=""></a></p>
<h4 id="brinput-directly-from-vcf-callers-optional"><br>Input directly from VCF Callers (optional) </h4>
<p><strong>For Users not running bcbio-nextgen pipeline:</strong><br>
Alternatively, install and execute the individual callers.</p>
<p>Refer to the installation and instructions for each caller:<br>
<br>- <a href="https://github.com/AstraZeneca-NGS/VarDict">VarDict</a><br>
<br>- <a href="https://github.com/dkoboldt/varscan">VarScan</a><br>
<br>- <a href="https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_cancer_m2_MuTect2.php">MuTect2</a><br>
<br>- <a href="https://github.com/ekg/freebayes">FreeBayes</a><br>
<br>- <a href="https://github.com/Illumina/strelka">Strelka2</a></p>
<p><a name="test" href=""></a></p>
<h4 id="brtest-dataset"><br>Test Dataset </h4>
<p>In this vignette, we utilise a <a href="https://github.com/skandlab/SMuRF/tree/master/test">partial output dataset</a> derived from the chronic lymphocytic leukemia (CLL) data downloaded from the European Genome-phenome Archive (EGA) under the accession number EGAS00001001539. The dataset for testing the package is provided in the SMuRF package.</p>
<p><a name="requirements" href=""></a></p>
<h4 id="brrequirements"><br>Requirements </h4>
<p><strong>R 3.3 & 3.4</strong> : bioconductor::VariantAnnotation</p>
<p><strong>R >=3.5</strong> : BiocManager::VariantAnnotation</p>
<p><strong>h2o package</strong> :<br>
<em>If h2o package takes some time to download/install (~350MB), try manually installing from their <a href="https://h2o-release.s3.amazonaws.com/h2o/rel-yau/2/index.html">AWS page.</a></em></p>
<p><a name="installation" href=""></a></p>
<h4 id="brinstallation"><br>Installation </h4>
<p><br>1. The latest version of the package is updated on Github <a href="https://github.com/skandlab/SMuRF">https://github.com/skandlab/SMuRF</a></p>
<ol start="2">
<li>You can install the current SMuRF directly from Github via the following R commands:</li>
</ol>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code><span class="token comment">#devtools is required</span>
install.packages<span class="token punctuation">(</span><span class="token string">"devtools"</span><span class="token punctuation">)</span>
library<span class="token punctuation">(</span>devtools<span class="token punctuation">)</span>
install_github<span class="token punctuation">(</span><span class="token string">"skandlab/SMuRF"</span><span class="token punctuation">,</span> subdir<span class="token operator">=</span><span class="token string">"smurf"</span><span class="token punctuation">)</span>
</code></pre><p><br>(<em>Alternative option</em>) SMuRF installation via downloading of the package from Github:</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code><span class="token comment">#Clone or download package from Github https://github.com/skandlab/SMuRF/tree/master/smurf and save to your local directory</span>
install.packages<span class="token punctuation">(</span><span class="token string">"my/current/directory/smurf"</span><span class="token punctuation">,</span> repos <span class="token operator">=</span> <span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span> type <span class="token operator">=</span> <span class="token string">"source"</span><span class="token punctuation">)</span>
</code></pre><p><br> <em>SMuRF</em> concurrently predicts single somatic nucleotide variants (SNV) as well as small insertions and deletions (indels) and saves time by parsing the VCF files once.</p>
<p><em>Missing packages will be installed the first time you run <em>SMuRF</em>.</em></p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>library<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span> <span class="token comment">#load SMuRF package</span>
smurf<span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token comment">#check version and parameters</span>
<span class="token comment"># "SMuRFv3.0.0 (16th Jan 2024)"</span>
smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span> mode<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">,</span>
annotation<span class="token operator">=</span>F<span class="token punctuation">,</span> output.dir<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span> parse.dir<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span>
snv.cutoff <span class="token operator">=</span> <span class="token string">'default'</span><span class="token punctuation">,</span> indel.cutoff <span class="token operator">=</span> <span class="token string">'default'</span><span class="token punctuation">,</span>
build<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span> change.build<span class="token operator">=</span>F<span class="token punctuation">,</span> find.build<span class="token operator">=</span>F<span class="token punctuation">,</span>
t.label<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">,</span> re.tabIndex<span class="token operator">=</span>F<span class="token punctuation">,</span>
check.packages<span class="token operator">=</span>T<span class="token punctuation">,</span> file.exclude<span class="token operator">=</span><span class="token keyword keyword-NULL">NULL</span><span class="token punctuation">)</span>
myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>mydir<span class="token punctuation">,</span> <span class="token string">'combined'</span><span class="token punctuation">,</span> build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">)</span> <span class="token comment">#save output into 'myresults' variable</span>
</code></pre><p><a href="#home">back to top</a></p>
<p><a name="functions" href=""></a></p>
<h4 id="brparameters"><br>Parameters </h4>
<table>
<thead>
<tr>
<th style="text-align:center">Arguments</th>
<th style="text-align:left">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:center">directory</td>
<td style="text-align:left">Choose directory where the Variant Caller Format(VCF) files are located</td>
</tr>
<tr>
<td style="text-align:center">output.dir</td>
<td style="text-align:left">Path to output directory (if saving files as .txt)</td>
</tr>
<tr>
<td style="text-align:center">parse.dir</td>
<td style="text-align:left">Specify if changing SMuRF default cutoffs. Path to the location of existing snv-parse.txt and indel-parse.txt files generated by SMuRF</td>
</tr>
<tr>
<td style="text-align:center">mode</td>
<td style="text-align:left">Choose "snv", "indel" or "combined" (snv+indel). "combined" provides a separate list of SNVs and indels.</td>
</tr>
<tr>
<td style="text-align:center">annotation</td>
<td style="text-align:left">TRUE or FALSE (default). Provide gene annotations for each variant call.</td>
</tr>
<tr>
<td style="text-align:center">nthreads</td>
<td style="text-align:left">Number of cores used for RandomForest prediction. Default (-1) for maximum number of cores. <em>For 32-bit Windows, only 1 core is allowed (nthreads=1).</em></td>
</tr>
<tr>
<td style="text-align:center">t.label</td>
<td style="text-align:left">(Optional) Provide the sample name for your tumour sample to ease the identification of the normal and tumour sample names in your vcf</td>
</tr>
<tr>
<td style="text-align:center">file.exclude</td>
<td style="text-align:left">(Optional) Additional keywords in file directory names to be filtered.</td>
</tr>
<tr>
<td style="text-align:center">build</td>
<td style="text-align:left">Specify your human genome build: build="hg19" or build="hg38"</td>
</tr>
<tr>
<td style="text-align:center">change.build</td>
<td style="text-align:left">TRUE or FALSE (default). For conversion of your genomic coordinates</td>
</tr>
<tr>
<td style="text-align:center">find.build</td>
<td style="text-align:left">TRUE or FALSE (default). Additional genome build check for the annotation step.</td>
</tr>
<tr>
<td style="text-align:center">snv.cutoff</td>
<td style="text-align:left">Default SMuRF_score cutoff for the SNV model unless a number between 0 to 1 is stated</td>
</tr>
<tr>
<td style="text-align:center">indel.cutoff</td>
<td style="text-align:left">Default SMuRF_score cutoff for the INDEL model unless a number between 0 to 1 is stated</td>
</tr>
<tr>
<td style="text-align:center">re.tabIndex</td>
<td style="text-align:left">TRUE or FALSE (default). Set to TRUE to create tab-indexed (.tbi) files for each vcf</td>
</tr>
<tr>
<td style="text-align:center">check.packages=T</td>
<td style="text-align:left">Developer mode</td>
</tr>
</tbody>
</table>
<p>For more information on the parameters see R documentation:</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>help<span class="token punctuation">(</span>smurf<span class="token punctuation">)</span>
</code></pre><br>
Examples:
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>library<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span> <span class="token comment">#load SMuRF package</span>
myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span><span class="token string">"/path/to/directory.."</span><span class="token punctuation">,</span>
mode<span class="token operator">=</span><span class="token string">"snv"</span><span class="token punctuation">,</span> <span class="token comment">#snv only</span>
output.dir<span class="token operator">=</span><span class="token string">"/path/to/output"</span><span class="token punctuation">,</span> <span class="token comment">#saving your output</span>
build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">)</span>
<span class="token comment">#Include gene annotations for coding regions in output</span>
myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span><span class="token string">"/path/to/directory.."</span><span class="token punctuation">,</span>
mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> <span class="token comment">#snv and indel predictions</span>
annotation<span class="token operator">=</span>T<span class="token punctuation">,</span> <span class="token comment">#generate gene annotations</span>
build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">)</span>
</code></pre><p><a href="#home">back to top</a></p>
<p><a name="input" href=""></a></p>
<h4 id="brrunning-smurf-selecting-the-correct-input-vcfs"><br>Running SMuRF: Selecting the correct input vcfs </h4>
<p><br><em>SMuRF</em> requires 5 caller VCF (vcf.gz) files as input stated under the "directory" parameter. Provide a path to a directory containing all 5 caller VCF files. <strong>caller.vcf.gz</strong> (compressed) and <strong>caller.vcf</strong> are accepted formats.</p>
<p>The tab-indexed (.tbi) files for each caller are required for the parsing step. If the <strong>.tbi</strong> files are missing, specify using <em>re.tabIndex=T</em> on SMuRF to generate these files.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
mode <span class="token operator">=</span><span class="token string">"snv"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> annotation <span class="token operator">=</span> T<span class="token punctuation">,</span> build <span class="token operator">=</span> <span class="token string">'hg19'</span><span class="token punctuation">,</span>
re.tabIndex <span class="token operator">=</span> T<span class="token punctuation">)</span> <span class="token comment">#generate .tbi files in directory</span>
<span class="token comment">#"Generating .tbi files in directory..."</span>
<span class="token comment"># Connection successful!</span>
<span class="token comment">#If the vcf files are in different directories:</span>
<span class="token comment">#Specify directories manually</span>
dir.list <span class="token operator">=</span> list<span class="token punctuation">(</span>mutect<span class="token operator">=</span><span class="token string">'/path1/to/mutect.vcf.gz'</span><span class="token punctuation">,</span>
freebayes<span class="token operator">=</span><span class="token string">'/path2/to/freebayes.vcf.gz'</span><span class="token punctuation">,</span>
vardict<span class="token operator">=</span><span class="token string">'/path3/to/vardict.vcf.gz'</span><span class="token punctuation">,</span>
varscan<span class="token operator">=</span><span class="token string">'/path4/to/varscan.vcf.gz'</span><span class="token punctuation">,</span>
strelka<span class="token operator">=</span><span class="token string">'/path5/to/strelka.vcf.gz'</span><span class="token punctuation">)</span>
myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span>dir.list<span class="token punctuation">,</span>
mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">)</span>
</code></pre><p><br>In some cases, your input directory may contain other VCF files generated by bcbio. For example, germline VCF files, copy-number related files, older version VCFs. An exclusion <em>file.exclude</em> can be added to make sure that SMuRF selects the correct VCF files.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>list.files<span class="token punctuation">(</span>directory<span class="token punctuation">)</span>
<span class="token comment"># sample1.mutect.vcf.gz</span>
<span class="token comment"># sample1.mutect-germline.vcf.gz #to be excluded</span>
<span class="token comment"># sample1.freebayes.vcf.gz</span>
<span class="token comment"># sample1.vardict.vcf.gz</span>
<span class="token comment"># sample1.varscan.vcf.gz</span>
<span class="token comment"># sample1.varscan-version1.vcf.gz #to be excluded</span>
<span class="token comment"># sample1.strelka.vcf.gz</span>
<span class="token comment"># sample1.strelka-archive.vcf.gz #to be excluded</span>
myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span><span class="token string">"/path/to/directory.."</span><span class="token punctuation">,</span>
file.exclude <span class="token operator">=</span> c<span class="token punctuation">(</span><span class="token string">"germline"</span><span class="token punctuation">,</span><span class="token string">"version1"</span><span class="token punctuation">,</span><span class="token string">"archive"</span><span class="token punctuation">)</span> <span class="token comment">#keywords in file name to be excluded</span>
mode<span class="token operator">=</span><span class="token string">"snv"</span><span class="token punctuation">,</span>
output.dir<span class="token operator">=</span><span class="token string">"/path/to/output"</span><span class="token punctuation">,</span> build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">)</span>
</code></pre><p><br>It is optional to indicate your normal and tumour sample labels. <em>SMuRF</em> detects your normal and tumour sample names in order to generate variant allele frequency information. If this information is missing in your VCF headers, <em>SMuRF</em> will terminate with an error. State your unique tumour sample label using <em>t.label</em>.</p>
<p>Possible normal/tumour sample labels:</p>
<p>sample1-N, sample1-T<br>
<br>sample1_normal, sample1_tumour<br>
<br>sample1.healthy, sample1.cancer</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
mode <span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> build <span class="token operator">=</span> <span class="token string">'hg19'</span><span class="token punctuation">,</span>
t.label <span class="token operator">=</span> <span class="token string">'tumour'</span> <span class="token comment">#optional if labels were detected from vcf headers correctly</span>
<span class="token punctuation">)</span>
</code></pre><p><a href="#home">back to top</a></p>
<p><a name="build" href=""></a></p>
<h4 id="brrunning-smurf-detecting-and-changing-genome-build"><br>Running SMuRF: Detecting and changing genome build </h4>
<p><br> The genome build for your sample must be specified ( <em>build='hg19'</em> or <em>build='hg38'</em> ).</p>
<p>hg19 also refers to the Genome Reference Consortium Human Build 37 (GRCh37)<br>
<br>hg38 also refers to the Genome Reference Consortium Human Build 38 (GRCh38)</p>
<p>The genome build stated in <em>SMuRF</em> will be cross-checked with the build used in your VCF files.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
mode <span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> annotation <span class="token operator">=</span> T<span class="token punctuation">,</span>
build <span class="token operator">=</span> <span class="token string">'hg38'</span> <span class="token comment">#wrong build stated</span>
<span class="token punctuation">)</span>
<span class="token comment"># "Genome build stated in SMuRF:"</span>
<span class="token comment"># "hg38"</span>
<span class="token comment"># "Ref genome used in vcf:"</span>
<span class="token comment"># "file:///home/projects/13001264/softwares/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa"</span>
<span class="token comment"># "Warning: build provided does not match ref genome used in vcf. SMuRF CDS annotation may not run properly if genome build is incorrect."</span>
<span class="token comment"># "Final genome build used for analysis: hg38"</span>
<span class="token comment"># </span>
<span class="token comment"># Warning message</span>
</code></pre><p><br>If you are unsure of the genome build used in your analysis, specify <em>find.build=T</em>.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
mode <span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> annotation <span class="token operator">=</span> T<span class="token punctuation">,</span>
build <span class="token operator">=</span> <span class="token string">'hg38'</span><span class="token punctuation">,</span> <span class="token comment">#wrong build stated</span>
find.build <span class="token operator">=</span> T<span class="token punctuation">,</span> <span class="token comment">#if unsure of genome build</span>
<span class="token punctuation">)</span>
<span class="token comment"># "Genome build stated in SMuRF:"</span>
<span class="token comment"># "hg38"</span>
<span class="token comment"># "Ref genome used in vcf:"</span>
<span class="token comment"># "file:///home/projects/13001264/softwares/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa"</span>
<span class="token comment"># "Warning: build provided does not match ref genome used in vcf. SMuRF CDS annotation may not run properly if genome build is incorrect."</span>
<span class="token comment"># "Changing build variable provided"</span>
<span class="token comment"># "hg38 -> hg19"</span>
<span class="token comment"># "Final genome build used for analysis: hg19"</span>
<span class="token comment"># No errors</span>
</code></pre><p><br>Samples from different batches may be aligned to a different genome reference build. In order to standardize your gene annotations and output, specify <em>change.build</em> for genome build conversion.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
mode <span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> annotation <span class="token operator">=</span> T<span class="token punctuation">,</span>
build <span class="token operator">=</span> <span class="token string">'hg19'</span><span class="token punctuation">,</span>
change.build <span class="token operator">=</span> T<span class="token punctuation">,</span> <span class="token comment">#genome build conversion</span>
<span class="token punctuation">)</span>
<span class="token comment"># "Genome build stated in SMuRF:"</span>
<span class="token comment"># "hg19"</span>
<span class="token comment"># "Ref genome used in vcf:"</span>
<span class="token comment"># "file:///home/projects/13001264/softwares/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa"</span>
<span class="token comment"># "Final genome build used for analysis: hg19"</span>
<span class="token comment"># "Compiling annotations"</span>
<span class="token comment"># "Changing annotations from hg19 to hg38"</span>
</code></pre><p><a href="#home">back to top</a></p>
<p><a name="cutoff" href=""></a></p>
<h4 id="brrunning-smurf-tweaking-smurf-score-cut-off"><br>Running SMuRF: Tweaking SMuRF score cut-off </h4>
<p><br><em>SMuRF</em> v3.0.0 is fine-tuned to achieve the max f1 score in our test set.</p>
<p>Re-adjust the stringency of the prediction with a specific cut-off value.<br>
Use parameters <em>snv.cutoff</em> or <em>indel.cutoff</em> to adjust the thresholds (higher cut-off provide a smaller set of calls with better confidence).</p>
<p>To re-adjust the cut-off value of an <strong>existing</strong> SMuRF output, simply provide the <em>parse.dir</em> to the snv-parse and indel-parse files for re-processing.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code><span class="token comment">#start with default cutoffs</span>
myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span>
snv.cutoff<span class="token operator">=</span><span class="token string">'default'</span><span class="token punctuation">,</span> indel.cutoff<span class="token operator">=</span><span class="token string">'default'</span><span class="token punctuation">,</span>
output.dir <span class="token operator">=</span> <span class="token string">'C:/Users/admin/myresults'</span><span class="token punctuation">)</span>
<span class="token comment">#modify cutoff from existing SMuRF parse files</span>
myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span>
snv.cutoff<span class="token operator">=</span><span class="token number">0.2</span><span class="token punctuation">,</span> indel.cutoff<span class="token operator">=</span><span class="token number">0.1</span><span class="token punctuation">,</span> <span class="token comment">#specify new cutoffs</span>
parse.dir <span class="token operator">=</span> <span class="token string">'C:/Users/admin/myresults'</span><span class="token punctuation">,</span> <span class="token comment">#SMuRF path existing parse.txt files</span>
output.dir <span class="token operator">=</span> <span class="token string">'C:/Users/admin/myresults2'</span> <span class="token comment">#new output) </span>
<span class="token comment">#Plot histogram</span>
hist<span class="token punctuation">(</span>as.numeric<span class="token punctuation">(</span>myresults<span class="token operator">$</span>smurf_indel<span class="token operator">$</span>predicted_indel<span class="token punctuation">[</span><span class="token punctuation">,</span><span class="token string">'SMuRF_score'</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">,</span> main <span class="token operator">=</span> <span class="token string">'Re-adjusted predicted indels'</span><span class="token punctuation">,</span> xlab <span class="token operator">=</span> <span class="token string">'SMuRF_score'</span><span class="token punctuation">,</span> col <span class="token operator">=</span> <span class="token string">'grey50'</span><span class="token punctuation">)</span>
</code></pre><p><a href="#home">back to top</a></p>
<p><a name="output" href=""></a></p>
<h4 id="broutput-format"><br>Output format </h4>
<p>Output files available include:</p>
<ol>
<li>
<p>Parsed-raw file (<em>parse</em>)</p>
</li>
<li>
<p>Predicted positive mutations (<em>predicted</em>)</p>
</li>
<li>
<p>Predicted positive mutations with annotations (<em>annotated</em>)* #for smurf's "cdsannotation" function only</p>
</li>
<li>
<p>Variant statistics (<em>stats</em>)</p>
</li>
<li>
<p>Time taken (<em>time</em>)</p>
</li>
</ol>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code><span class="token comment">#Viewing predicted output in R</span>
myresults<span class="token operator">$</span>smurf_snv<span class="token operator">$</span>predicted_snv
myresults<span class="token operator">$</span>smurf_indel<span class="token operator">$</span>predicted_indel
<span class="token comment">#see column description below</span>
</code></pre><table>
<thead>
<tr>
<th>Column</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>Chr</td>
<td>Chromosome number</td>
</tr>
<tr>
<td>START_POS_REF/END_POS_REF</td>
<td>Start and End nucleotide position of the somatic mutation</td>
</tr>
<tr>
<td>REF/ALT</td>
<td>Consensus Ref and Alt nucleotide changes of the highest likelihood</td>
</tr>
<tr>
<td>REF_MFVdVs/ALT_MFVdVs</td>
<td>Reference and Alternative nucleotide changes from each caller; Mutect2 (M), Freebayes (F), Vardict (Vd), Varscan (Vs) and Strelka2 (not abbreviated to preserve column name)</td>
</tr>
<tr>
<td>FILTER</td>
<td>Pass (TRUE) or Reject (FALSE) [boolean] mutation calls from the individual callers</td>
</tr>
<tr>
<td>Sample_Name</td>
<td>Sample name is extracted based on your labeled samples in the vcf files</td>
</tr>
<tr>
<td>Alt_Allele_Freq</td>
<td>Mean Variant allele frequency calculated from the tumor reads of the callers</td>
</tr>
<tr>
<td>Depth ref/alt N/T</td>
<td>Mean read depth from the N/T sample for ref/alt alleles</td>
</tr>
<tr>
<td>SMuRF_score</td>
<td>SMuRF confidence score of the predicted mutation</td>
</tr>
</tbody>
</table>
<br>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults<span class="token operator">$</span>smurf_indel<span class="token operator">$</span>stats_indel
<span class="token comment"># Passed_Calls</span>
<span class="token comment"># Strelka2 466</span>
<span class="token comment"># Mutect2 232</span>
<span class="token comment"># FreeBayes 306</span>
<span class="token comment"># VarDict 483</span>
<span class="token comment"># VarScan 1273</span>
<span class="token comment"># Atleast1 2431</span>
<span class="token comment"># Atleast2 278</span>
<span class="token comment"># Atleast3 43</span>
<span class="token comment"># Atleast4 7</span>
<span class="token comment"># All5 1</span>
<span class="token comment"># SMuRF_INDEL 88</span>
myresults<span class="token operator">$</span>smurf_snv<span class="token operator">$</span>stats_snv
<span class="token comment"># Passed_Calls</span>
<span class="token comment"># Strelka2 1362</span>
<span class="token comment"># Mutect2 1539</span>
<span class="token comment"># FreeBayes 216</span>
<span class="token comment"># VarDict 239</span>
<span class="token comment"># VarScan 1734</span>
<span class="token comment"># Atleast1 4017</span>
<span class="token comment"># Atleast2 928</span>
<span class="token comment"># Atleast3 60</span>
<span class="token comment"># Atleast4 48</span>
<span class="token comment"># All5 37</span>
<span class="token comment"># SMuRF_SNV 1043</span>
</code></pre><p><br>We added gene annotations using SnpEff (from bcbio) and <em>SMuRF</em> extracts the coding annotations from the canonical transcripts with the highest fucntional impact. Take note that your vcf.gz files should be tab-indexed (see <a href="#input">Running SMuRF: re.tabIndex</a>).</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>mydir<span class="token punctuation">,</span> <span class="token string">"cdsannotation"</span><span class="token punctuation">)</span> <span class="token comment">#runs SMuRF for SNV and indels + generate annotations</span>
myresults<span class="token operator">$</span>smurf_snv_annotation<span class="token operator">$</span>annotated<span class="token punctuation">[</span>order<span class="token punctuation">(</span>myresults<span class="token operator">$</span>smurf_snv_annotation<span class="token operator">$</span>annotated<span class="token operator">$</span>REGION<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">1</span><span class="token operator">:</span><span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">,</span><span class="token punctuation">]</span>
<span class="token comment"># Chr START_POS_REF END_POS_REF REF ALT REF_MFVdVs ALT_MFVdVs FILTER_Mutect2 FILTER_Freebayes FILTER_Vardict</span>
<span class="token comment"># 52 1 77806132 77806132 G A G/G/G/G/G A/A/A/A/A TRUE TRUE TRUE</span>
<span class="token comment"># 81 1 170961432 170961432 C T C/NA/NA/NA/C T/NA/NA/NA/T TRUE FALSE FALSE</span>
<span class="token comment"># FILTER_Varscan FILTER_Strelka2 Sample_Name Alt_Allele_Freq N_refDepth N_altDepth T_refDepth T_altDepth Allele</span>
<span class="token comment"># 52 TRUE TRUE icgc_cll_tumour 0.500 14 0 15 15 A</span>
<span class="token comment"># 81 FALSE TRUE icgc_cll_tumour 0.467 33 0 16 14 T</span>
<span class="token comment"># Annotation Impact Gene_name Gene_ID Feature_Type Feature_ID Transcript_BioType Rank HGVS.c</span>
<span class="token comment"># 52 missense_variant MODERATE AK5 ENSG00000154027 transcript ENST00000354567 protein_coding 6/14 c.770G>A</span>
<span class="token comment"># 81 missense_variant MODERATE MROH9 ENSG00000117501 transcript ENST00000367759 protein_coding 12/22 c.1156C>T</span>
<span class="token comment"># HGVS.p cDNA.pos CDS.pos AA.pos Distance REGION SMuRF_score</span>
<span class="token comment"># 52 p.Arg257His 1033/3248 770/1689 257/562 . CDS 0.9083840</span>
<span class="token comment"># 81 p.Arg386Cys 1310/3165 1156/2586 386/861 . CDS 0.8107475</span>
</code></pre><p><br>Time taken for your run:</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults<span class="token operator">$</span>time.taken
<span class="token operator"><</span><span class="token operator">!</span><span class="token operator">-</span><span class="token operator">-</span> Time difference of <span class="token number">20.52405</span> secs <span class="token operator">-</span><span class="token operator">-></span>
</code></pre><p><br>The raw parsed output:</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults<span class="token operator">$</span>smurf_indel<span class="token operator">$</span>parse_indel
myresults<span class="token operator">$</span>smurf_snv<span class="token operator">$</span>parse_snv
</code></pre><p><br>Indicate the <em>output.dir</em> to save the <em>SMuRF</em> output as tab-delimited .txt files in your targeted directory.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>myresults <span class="token operator">=</span> smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>find.package<span class="token punctuation">(</span><span class="token string">"smurf"</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token string">"/data"</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span>
output.dir <span class="token operator">=</span> <span class="token string">'C:/Users/admin/myresults'</span> <span class="token comment">#path to output directory</span>
<span class="token punctuation">)</span>
</code></pre><p><a href="#home">back to top</a></p>
<p><a name="multiple-samples" href=""></a></p>
<h4 id="brrunning-on-multiple-samples"><br>Running on multiple samples </h4>
<p>Iterate over multiple samples by providing the list of directories of where your sample files are located.</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>project.dir <span class="token operator">=</span> <span class="token string">'path/to/my/dir'</span>
list.files<span class="token punctuation">(</span>project.dir<span class="token punctuation">)</span>
<span class="token comment"># sample_A</span>
<span class="token comment"># sample_B</span>
<span class="token comment"># sample_C</span>
samples <span class="token operator">=</span> c<span class="token punctuation">(</span><span class="token string">'sample_A'</span><span class="token punctuation">,</span> <span class="token string">'sample_B'</span><span class="token punctuation">,</span> <span class="token string">'sample_C'</span><span class="token punctuation">)</span> <span class="token comment">#sample dir where vcf files are located</span>
<span class="token keyword keyword-for">for</span><span class="token punctuation">(</span>i <span class="token keyword keyword-in">in</span> <span class="token number">1</span><span class="token operator">:</span>length<span class="token punctuation">(</span>samples<span class="token punctuation">)</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
smurf<span class="token punctuation">(</span>directory<span class="token operator">=</span>paste0<span class="token punctuation">(</span>project.dir<span class="token punctuation">,</span> <span class="token string">'/'</span><span class="token punctuation">,</span> samples<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
mode<span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> build<span class="token operator">=</span><span class="token string">'hg19'</span><span class="token punctuation">,</span> annotation <span class="token operator">=</span> T<span class="token punctuation">,</span>
output.dir <span class="token operator">=</span> paste0<span class="token punctuation">(</span><span class="token string">'C:/Users/admin/myresults/'</span><span class="token punctuation">,</span>samples<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token punctuation">}</span>
</code></pre><p>Running SMuRF on multiple samples on a cluster (parallel multi-core instance)</p>
<pre data-role="codeBlock" data-info="r" class="language-r r"><code>install.packages<span class="token punctuation">(</span><span class="token string">"foreach"</span><span class="token punctuation">)</span>
install.packages<span class="token punctuation">(</span><span class="token string">"doParallel"</span><span class="token punctuation">)</span>
install.packages<span class="token punctuation">(</span><span class="token string">"doSNOW"</span><span class="token punctuation">)</span>
library<span class="token punctuation">(</span>foreach<span class="token punctuation">)</span>
library<span class="token punctuation">(</span>doParallel<span class="token punctuation">)</span>
library<span class="token punctuation">(</span>doSNOW<span class="token punctuation">)</span>
library<span class="token punctuation">(</span>smurf<span class="token punctuation">)</span>
project.dir <span class="token operator">=</span> <span class="token string">'path/to/my/dir'</span>
samples <span class="token operator">=</span> Sys.glob<span class="token punctuation">(</span>paste0<span class="token punctuation">(</span>project.dir<span class="token punctuation">,</span><span class="token string">'/*'</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token comment">#setup parallel backend to use many processors</span>
cores<span class="token operator">=</span>detectCores<span class="token punctuation">(</span><span class="token punctuation">)</span>
cl <span class="token operator"><-</span> makeCluster<span class="token punctuation">(</span>cores<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token operator">-</span><span class="token number">1</span><span class="token punctuation">)</span> <span class="token comment">#not to overload your computer</span>
registerDoParallel<span class="token punctuation">(</span>cl<span class="token punctuation">)</span>
foreach<span class="token punctuation">(</span>i<span class="token operator">=</span><span class="token number">1</span><span class="token operator">:</span>length<span class="token punctuation">(</span>samples<span class="token punctuation">)</span><span class="token punctuation">,</span> .packages <span class="token operator">=</span> <span class="token string">'smurf'</span><span class="token punctuation">,</span> .verbose <span class="token operator">=</span> F<span class="token punctuation">)</span> <span class="token percent-operator operator">%dopar%</span> <span class="token punctuation">{</span>
print<span class="token punctuation">(</span>i<span class="token punctuation">)</span>
smurf<span class="token punctuation">(</span>directory <span class="token operator">=</span> paste0<span class="token punctuation">(</span>project.dir<span class="token punctuation">,</span> <span class="token string">'/'</span><span class="token punctuation">,</span> samples<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
mode <span class="token operator">=</span><span class="token string">"combined"</span><span class="token punctuation">,</span> nthreads <span class="token operator">=</span> <span class="token number">1</span><span class="token punctuation">,</span> build <span class="token operator">=</span> <span class="token string">'hg19'</span><span class="token punctuation">,</span>
output.dir <span class="token operator">=</span> paste0<span class="token punctuation">(</span><span class="token string">'C:/Users/admin/myresults/'</span><span class="token punctuation">,</span>samples<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token punctuation">)</span>
<span class="token punctuation">}</span>
stopCluster<span class="token punctuation">(</span>cl<span class="token punctuation">)</span>
h2o.shutdown<span class="token punctuation">(</span><span class="token punctuation">)</span>
</code></pre><br>
For errors and bugs, please report on our Github page.
<p><a href="#home">back to top</a></p>
</div>
</body></html>