-
Notifications
You must be signed in to change notification settings - Fork 0
/
6-output_signature_normality.R
37 lines (30 loc) · 1.05 KB
/
6-output_signature_normality.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# This script creates a file containing the skew, and other statistics for normality for each drug signatures
library(tidyverse)
library(broom)
library(e1071)
files <- list.files("data/signatures/group/", full.names = T)
col_spec <- cols(
Name_GeneSymbol = col_character(),
Value_LogDiffExp = col_double()
)
output_statistics <- function(df, name) {
pat <- "([a-z]+)-(\\w+-\\w+-\\w+)-signature.tsv"
matches <- str_match(name, pat)
group <- matches[,2]
cell_line <- matches[,3]
normal <- shapiro.test(df$Value_LogDiffExp)
skew <- skewness(df$Value_LogDiffExp)
out <- list(w = round(normal$statistic, 3),
pval = round(normal$p.value, 3),
line = cell_line,
skewness = round(skew, 3))
}
output <- files %>%
map(~ read_tsv(.x, col_types = col_spec)) %>%
map2_dfr(basename(files), ~ output_statistics(.x, .y)) %>%
select(line, skewness, w, pval) %>%
rename(CellLine = line,
Skewness = skewness,
W = w,
`P-Value` = pval) %>%
write_csv("results/cell-line_normality_statistics.csv")