Skip to content

Commit e4d1785

Browse files
committed
v2.1 update
1 parent 5637658 commit e4d1785

File tree

6 files changed

+162
-95
lines changed

6 files changed

+162
-95
lines changed

DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: scCATCH
22
Type: Package
33
Title: Single Cell Cluster-Based Annotation Toolkit for Cellular Heterogeneity
4-
Version: 2.0
5-
Depends: R (>= 3.5.0)
4+
Version: 2.1
5+
Depends: R (>= 3.6.0)
66
Author: Xin Shao
77
Maintainer: Xin Shao<xin_shao@zju.edu.cn>
88
Description: This package is an automatic cluster-based annotation pipeline based on evidence-based score by matching the marker genes with known cell markers in tissue-specific cell taxonomy reference database.
@@ -14,6 +14,7 @@ Imports:
1414
Seurat (>= 3.0.0),
1515
stats,
1616
data.table,
17+
progress,
1718
Matrix
1819
Suggests:
1920
knitr,

NAMESPACE

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
export(findmarkergenes)
44
export(scCATCH)
5-
import(Matrix)
65
import(Seurat)
76
import(data.table)
7+
import(progress)
88
import(stats)
9+
importFrom(Matrix,rowMeans)

R/findmarkergenes.R

Lines changed: 57 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -176,11 +176,12 @@
176176
#' @details Renal Cell Carcinoma: Kidney.
177177
#' @details Supratentorial Primitive Neuroectodermal Tumor: Brain.
178178
#' @seealso \url{https://github.com/ZJUFanLab/scCATCH}
179-
#' @import Seurat stats Matrix
179+
#' @import Seurat stats progress
180+
#' @importFrom Matrix rowMeans
180181
#' @export findmarkergenes
181182

182-
findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellMatch = FALSE, cancer = NULL,
183-
tissue = NULL, cell_min_pct = 0.25, logfc = 0.25, pvalue = 0.05) {
183+
findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellMatch = FALSE,
184+
cancer = NULL, tissue = NULL, cell_min_pct = 0.25, logfc = 0.25, pvalue = 0.05) {
184185
# extract normalized data from Seurat object
185186
ndata <- object[["RNA"]]@data
186187
# extract cluster information of all single cells
@@ -195,17 +196,23 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
195196
clu_num2 <- clu_num2[order(clu_num2)]
196197
clu_num <- c(clu_num1, clu_num2)
197198
rm(object)
198-
cat("Note: the raw data matrix includes", ncol(ndata), "cells and", nrow(ndata), "genes.", "\n")
199+
cat("Note: the raw data matrix includes", ncol(ndata), "cells and", nrow(ndata), "genes.",
200+
"\n")
201+
202+
if (length(clu_num) == 1) {
203+
stop("There is only one cluster, please do clustering analysis or select more clusters for Seurat object!")
204+
}
205+
199206
Sys.sleep(2)
200207
clu_num1 <- clu_num
201208
if (is.null(species)) {
202-
cat('\n')
209+
cat("\n")
203210
stop("Please define the species! 'Human' or 'Mouse'.")
204211
}
205212
geneinfo <- geneinfo[geneinfo$species == species, ]
206213
# revise gene symbol
207-
cat('\n')
208-
cat("---Revising gene symbols according to NCBI Gene symbols (updated in Jan. 10, 2020, https://www.ncbi.nlm.nih.gov/gene) and no matched genes and duplicated genes will be removed.",
214+
cat("\n")
215+
cat("---Revising gene symbols according to NCBI Gene symbols (updated in June 19, 2020, https://www.ncbi.nlm.nih.gov/gene) and no matched genes and duplicated genes will be removed.",
209216
"\n")
210217
Sys.sleep(2)
211218
genename <- rownames(ndata)
@@ -226,28 +233,29 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
226233
genedata1 <- as.data.frame(table(genedata$new_name), stringsAsFactors = F)
227234
genedata1 <- genedata1[genedata1$Freq == 1, ]
228235
genedata <- genedata[genedata$new_name %in% genedata1$Var1, ]
229-
ndata <- ndata[genedata$raw_name,]
236+
ndata <- ndata[genedata$raw_name, ]
230237
rownames(ndata) <- genedata$new_name
231-
cat('\n')
232-
cat("Note: the new data matrix includes", ncol(ndata), "cells and", nrow(ndata), "genes.", "\n")
238+
cat("\n")
239+
cat("Note: the new data matrix includes", ncol(ndata), "cells and", nrow(ndata), "genes.",
240+
"\n")
233241
Sys.sleep(2)
234242
if (match_CellMatch) {
235243
ndata3 <- ndata
236244
# tissue without cancer
237245
if (is.null(cancer)) {
238-
CellMatch <- CellMatch[CellMatch$speciesType == species & CellMatch$cancerType == "Normal",
239-
]
246+
CellMatch <- CellMatch[CellMatch$speciesType == species & CellMatch$cancerType ==
247+
"Normal", ]
240248
# check tissue
241249
if (is.null(tissue)) {
242-
cat('\n')
250+
cat("\n")
243251
stop("Please define the origin tissue of cells! Select one or more related tissue types.")
244252
}
245253
tissue_match <- NULL
246254
# check the tissue
247255
tissue_match <- tissue %in% CellMatch$tissueType
248256
tissue_match <- which(tissue_match == "FALSE")
249257
if (length(tissue_match) > 0) {
250-
cat('\n')
258+
cat("\n")
251259
stop(paste(tissue[tissue_match], ", not matched with the tissue types in CellMatch database! Please select one or more related tissue types.",
252260
sep = ""))
253261
}
@@ -260,12 +268,12 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
260268
}
261269
}
262270
if (length(cellmarker_num) < 100) {
263-
cat('\n')
271+
cat("\n")
264272
cat(paste("Warning: there are only ", length(cellmarker_num), " potential marker genes in CellMatch database for ",
265273
species, " on ", tissue1, "!", sep = ""), "\n")
266274
}
267275
if (length(cellmarker_num) >= 100) {
268-
cat('\n')
276+
cat("\n")
269277
cat(paste("Note: there are ", length(cellmarker_num), " potential marker genes in CellMatch database for ",
270278
species, " on ", tissue1, ".", sep = ""), "\n")
271279
}
@@ -280,22 +288,22 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
280288
cancer_match <- cancer %in% CellMatch$cancerType
281289
cancer_match <- which(cancer_match == "FALSE")
282290
if (length(cancer_match) > 0) {
283-
cat('\n')
291+
cat("\n")
284292
stop(paste(cancer[cancer_match], ", not matched with the cancer types in CellMatch database! Please select one or more related cancer types.",
285293
sep = ""))
286294
}
287295
CellMatch <- CellMatch[CellMatch$cancerType %in% cancer, ]
288296
# check tissue
289297
if (is.null(tissue)) {
290-
cat('\n')
298+
cat("\n")
291299
stop("Please define the origin tissue of cells! Select one or more related tissue types.")
292300
}
293301
tissue_match <- NULL
294302
# check the tissue
295303
tissue_match <- tissue %in% CellMatch$tissueType
296304
tissue_match <- which(tissue_match == "FALSE")
297305
if (length(tissue_match) > 0) {
298-
cat('\n')
306+
cat("\n")
299307
stop(paste(tissue[tissue_match], ", not matched with the tissue types in CellMatch database! Please select one or more related tissue types.",
300308
sep = ""))
301309
}
@@ -314,20 +322,20 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
314322
}
315323
}
316324
if (length(cellmarker_num) < 100) {
317-
cat('\n')
325+
cat("\n")
318326
cat(paste("Warning: there are only ", length(cellmarker_num), " potential marker genes in CellMatch database for ",
319327
species, " ", cancer1, " on ", tissue1, "!", sep = ""), "\n")
320328
}
321329
if (length(cellmarker_num) >= 100) {
322-
cat('\n')
330+
cat("\n")
323331
cat(paste("Note: there are ", length(cellmarker_num), " potential marker genes in CellMatch database for ",
324332
species, " ", cancer1, " on ", tissue1, ".", sep = ""), "\n")
325333
}
326334
ndata <- ndata[rownames(ndata) %in% cellmarker_num, ]
327335
}
328336
}
329337
if (nrow(ndata) == 0) {
330-
cat('\n')
338+
cat("\n")
331339
stop("There is no matched potential marker genes in the matrix! Please try again to find differential expressed genes by setting match_CellMatch as FALSE!")
332340
}
333341
# generating pair-wise clusters
@@ -341,26 +349,32 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
341349
cluster_match <- cluster %in% clu_num
342350
cluster_match <- which(cluster_match == "FALSE")
343351
if (length(cluster_match) > 0) {
344-
cat('\n')
352+
cat("\n")
345353
stop(paste(cluster[cluster_match], ", not matched with the cell clusters! Please select one or more related clusters.",
346354
sep = ""))
347355
}
348356
clu_pair <- clu_pair[clu_pair$cluster1 %in% cluster, ]
349357
clu_num1 <- clu_num[clu_num %in% cluster]
350358
}
359+
cat("Note: There are", length(clu_num), "clusters in Seurat object.", "\n")
360+
Sys.sleep(2)
361+
cat("Preparing to find potential marker genes for", cluster, "cluster(s).", "\n")
351362
# generating result file
352363
clu_marker <- NULL
353364
Sys.sleep(2)
354-
cat('\n')
355365
# calculate the p value and log fold change
356366
for (i in 1:length(clu_num1)) {
357367
cat(paste("Finding potential marker genes for cluster", clu_num1[i], sep = " "), "\n")
358-
clu_pair1 <- clu_pair[clu_pair$cluster1 == clu_num[i], ]
368+
clu_pair1 <- clu_pair[clu_pair$cluster1 == clu_num1[i], ]
359369
# generating result file for each cluster
360370
clu_marker1 <- NULL
371+
pb <- progress_bar$new(format = "[:bar] Finished::percent Remaining::eta", total = nrow(clu_pair1) *
372+
nrow(ndata), clear = FALSE, width = 60, complete = "+", incomplete = "-")
373+
361374
for (j in 1:nrow(clu_pair1)) {
362375
clu_marker2 <- as.data.frame(matrix(data = 0, nrow = nrow(ndata), ncol = 6))
363-
colnames(clu_marker2) <- c("cluster", "gene", "pct", "comp_cluster", "avg_logfc", "pvalue")
376+
colnames(clu_marker2) <- c("cluster", "gene", "pct", "comp_cluster", "avg_logfc",
377+
"pvalue")
364378
clu_marker2[, 2] <- rownames(ndata)
365379
# extract normalized data of pair wise clusters
366380
ndata1 <- ndata[, clu_info[clu_info$cluster == clu_pair1$cluster1[j], ]$cell]
@@ -379,6 +393,7 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
379393
genedata2 <- as.numeric(ndata2[k, ])
380394
clu_marker_pvalue[k] <- stats::wilcox.test(genedata1, genedata2, )$p.value
381395
}
396+
pb$tick()
382397
}
383398
clu_marker2$pct <- clu_marker_pct
384399
clu_marker2$pvalue <- clu_marker_pvalue
@@ -392,21 +407,22 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
392407
d1 <- as.data.frame(table(clu_marker1$gene), stringsAsFactors = F)
393408
d1 <- d1[d1$Freq == (length(clu_num) - 1), ]
394409
if (nrow(d1) > 1) {
395-
clu_marker1 <- clu_marker1[clu_marker1$gene %in% d1$Var1, ]
396-
clu_marker_gene <- unique(clu_marker1$gene)
397-
clu_marker_gene <- clu_marker_gene[order(clu_marker_gene)]
398-
avg_logfc <- NULL
399-
for (j in 1:length(clu_marker_gene)) {
400-
clu_marker_avg_logfc <- clu_marker1[clu_marker1$gene == clu_marker_gene[j], ]
401-
avg_logfc[j] <- mean(clu_marker_avg_logfc$avg_logfc)
402-
}
403-
clu_marker1 <- unique(clu_marker1[, c("cluster", "gene", "pct")])
404-
clu_marker1 <- clu_marker1[order(clu_marker1$gene), ]
405-
clu_marker1$avg_logfc <- avg_logfc
406-
clu_marker <- rbind(clu_marker, clu_marker1)
410+
clu_marker1 <- clu_marker1[clu_marker1$gene %in% d1$Var1, ]
411+
clu_marker_gene <- unique(clu_marker1$gene)
412+
clu_marker_gene <- clu_marker_gene[order(clu_marker_gene)]
413+
avg_logfc <- NULL
414+
for (j in 1:length(clu_marker_gene)) {
415+
clu_marker_avg_logfc <- clu_marker1[clu_marker1$gene == clu_marker_gene[j],
416+
]
417+
avg_logfc[j] <- mean(clu_marker_avg_logfc$avg_logfc)
418+
}
419+
clu_marker1 <- unique(clu_marker1[, c("cluster", "gene", "pct")])
420+
clu_marker1 <- clu_marker1[order(clu_marker1$gene), ]
421+
clu_marker1$avg_logfc <- avg_logfc
422+
clu_marker <- rbind(clu_marker, clu_marker1)
407423
}
408424
}
409-
cat("***Done***", "\n")
425+
cat("---Done---", "\n")
410426
Sys.sleep(2)
411427
}
412428
rm(ndata1, ndata2)
@@ -422,13 +438,13 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
422438
res[[2]] <- "NA"
423439
names(res) <- c("new_data_matrix", "clu_markers")
424440
if (is.null(clu_marker)) {
425-
cat('\n')
441+
cat("\n")
426442
cat("Warning: there is no potential marker genes found in the matrix! You may adjust the cell clusters by applying other clustering algorithm and try again.")
427443
return(res)
428444
stop()
429445
}
430446
if (nrow(clu_marker) == 0) {
431-
cat('\n')
447+
cat("\n")
432448
cat("Warning: there is no potential marker genes found in the matrix! You may adjust the cell clusters by applying other clustering algorithm and try again.")
433449
return(res)
434450
stop()

0 commit comments

Comments
 (0)