-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsample_check.R
More file actions
35 lines (24 loc) · 14.9 KB
/
sample_check.R
File metadata and controls
35 lines (24 loc) · 14.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#### This script is for analysis in R AFTER COUNT GENERATION AND CREATING A METADATA FILE
#Sample quality check
# load required raw count data and metadata
metadata <- readRDS(here::here("samples.metadata.RDS"))
reads_count <- readRDS(file = here::here("read.counts.RDS"))
samples.metadata.clean <- readRDS(here::here("samples.metadata.RDS"))
# Create a DGEList object
counts <- DGEList(reads_count, group = samples.metadata$Status)
# check the number of genes with no expression in all samples
table(rowSums(counts$counts==0)==45)
# Filtering non-expressed and lowly-expressed genes
keep.exprs <- filterByExpr(counts, group=samples.metadata$Status)
filtered.counts <- counts[keep.exprs,, keep.lib.sizes=FALSE]
# obtain logCPM unnormalized for plotting purposes
# Here, the norm.factors value is 1 for all samples
logcpm.unnorm.counts <- cpm(filtered.counts, log = TRUE, prior.count = 2, normalized.lib.sizes = TRUE)
# Normalize for composition bias using TMM
filtered.counts <- calcNormFactors(filtered.counts, method = 'TMM')
# Convert counts per million per gene to log counts per million for further downstream analysis.
logcpm.norm.counts <- cpm(filtered.counts, log = TRUE, prior.count = 2, normalized.lib.sizes = TRUE)
# save outputs for later analysis
saveRDS(filtered.counts, here::here("filtered.counts.RDS"))
saveRDS(logcpm.unnorm.counts, here::here("logcpm.unnorm.counts.RDS"))
saveRDS(logcpm.norm.counts, here::here("logcpm.norm.counts.RDS"))