176176# ' @details Renal Cell Carcinoma: Kidney.
177177# ' @details Supratentorial Primitive Neuroectodermal Tumor: Brain.
178178# ' @seealso \url{https://github.com/ZJUFanLab/scCATCH}
179- # ' @import Seurat stats Matrix
179+ # ' @import Seurat stats progress
180+ # ' @importFrom Matrix rowMeans
180181# ' @export findmarkergenes
181182
182- findmarkergenes <- function (object , species = NULL , cluster = " All" , match_CellMatch = FALSE , cancer = NULL ,
183- tissue = NULL , cell_min_pct = 0.25 , logfc = 0.25 , pvalue = 0.05 ) {
183+ findmarkergenes <- function (object , species = NULL , cluster = " All" , match_CellMatch = FALSE ,
184+ cancer = NULL , tissue = NULL , cell_min_pct = 0.25 , logfc = 0.25 , pvalue = 0.05 ) {
184185 # extract normalized data from Seurat object
185186 ndata <- object [[" RNA" ]]@ data
186187 # extract cluster information of all single cells
@@ -195,17 +196,23 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
195196 clu_num2 <- clu_num2 [order(clu_num2 )]
196197 clu_num <- c(clu_num1 , clu_num2 )
197198 rm(object )
198- cat(" Note: the raw data matrix includes" , ncol(ndata ), " cells and" , nrow(ndata ), " genes." , " \n " )
199+ cat(" Note: the raw data matrix includes" , ncol(ndata ), " cells and" , nrow(ndata ), " genes." ,
200+ " \n " )
201+
202+ if (length(clu_num ) == 1 ) {
203+ stop(" There is only one cluster, please do clustering analysis or select more clusters for Seurat object!" )
204+ }
205+
199206 Sys.sleep(2 )
200207 clu_num1 <- clu_num
201208 if (is.null(species )) {
202- cat(' \n ' )
209+ cat(" \n " )
203210 stop(" Please define the species! 'Human' or 'Mouse'." )
204211 }
205212 geneinfo <- geneinfo [geneinfo $ species == species , ]
206213 # revise gene symbol
207- cat(' \n ' )
208- cat(" ---Revising gene symbols according to NCBI Gene symbols (updated in Jan. 10 , 2020, https://www.ncbi.nlm.nih.gov/gene) and no matched genes and duplicated genes will be removed." ,
214+ cat(" \n " )
215+ cat(" ---Revising gene symbols according to NCBI Gene symbols (updated in June 19 , 2020, https://www.ncbi.nlm.nih.gov/gene) and no matched genes and duplicated genes will be removed." ,
209216 " \n " )
210217 Sys.sleep(2 )
211218 genename <- rownames(ndata )
@@ -226,28 +233,29 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
226233 genedata1 <- as.data.frame(table(genedata $ new_name ), stringsAsFactors = F )
227234 genedata1 <- genedata1 [genedata1 $ Freq == 1 , ]
228235 genedata <- genedata [genedata $ new_name %in% genedata1 $ Var1 , ]
229- ndata <- ndata [genedata $ raw_name ,]
236+ ndata <- ndata [genedata $ raw_name , ]
230237 rownames(ndata ) <- genedata $ new_name
231- cat(' \n ' )
232- cat(" Note: the new data matrix includes" , ncol(ndata ), " cells and" , nrow(ndata ), " genes." , " \n " )
238+ cat(" \n " )
239+ cat(" Note: the new data matrix includes" , ncol(ndata ), " cells and" , nrow(ndata ), " genes." ,
240+ " \n " )
233241 Sys.sleep(2 )
234242 if (match_CellMatch ) {
235243 ndata3 <- ndata
236244 # tissue without cancer
237245 if (is.null(cancer )) {
238- CellMatch <- CellMatch [CellMatch $ speciesType == species & CellMatch $ cancerType == " Normal " ,
239- ]
246+ CellMatch <- CellMatch [CellMatch $ speciesType == species & CellMatch $ cancerType ==
247+ " Normal " , ]
240248 # check tissue
241249 if (is.null(tissue )) {
242- cat(' \n ' )
250+ cat(" \n " )
243251 stop(" Please define the origin tissue of cells! Select one or more related tissue types." )
244252 }
245253 tissue_match <- NULL
246254 # check the tissue
247255 tissue_match <- tissue %in% CellMatch $ tissueType
248256 tissue_match <- which(tissue_match == " FALSE" )
249257 if (length(tissue_match ) > 0 ) {
250- cat(' \n ' )
258+ cat(" \n " )
251259 stop(paste(tissue [tissue_match ], " , not matched with the tissue types in CellMatch database! Please select one or more related tissue types." ,
252260 sep = " " ))
253261 }
@@ -260,12 +268,12 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
260268 }
261269 }
262270 if (length(cellmarker_num ) < 100 ) {
263- cat(' \n ' )
271+ cat(" \n " )
264272 cat(paste(" Warning: there are only " , length(cellmarker_num ), " potential marker genes in CellMatch database for " ,
265273 species , " on " , tissue1 , " !" , sep = " " ), " \n " )
266274 }
267275 if (length(cellmarker_num ) > = 100 ) {
268- cat(' \n ' )
276+ cat(" \n " )
269277 cat(paste(" Note: there are " , length(cellmarker_num ), " potential marker genes in CellMatch database for " ,
270278 species , " on " , tissue1 , " ." , sep = " " ), " \n " )
271279 }
@@ -280,22 +288,22 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
280288 cancer_match <- cancer %in% CellMatch $ cancerType
281289 cancer_match <- which(cancer_match == " FALSE" )
282290 if (length(cancer_match ) > 0 ) {
283- cat(' \n ' )
291+ cat(" \n " )
284292 stop(paste(cancer [cancer_match ], " , not matched with the cancer types in CellMatch database! Please select one or more related cancer types." ,
285293 sep = " " ))
286294 }
287295 CellMatch <- CellMatch [CellMatch $ cancerType %in% cancer , ]
288296 # check tissue
289297 if (is.null(tissue )) {
290- cat(' \n ' )
298+ cat(" \n " )
291299 stop(" Please define the origin tissue of cells! Select one or more related tissue types." )
292300 }
293301 tissue_match <- NULL
294302 # check the tissue
295303 tissue_match <- tissue %in% CellMatch $ tissueType
296304 tissue_match <- which(tissue_match == " FALSE" )
297305 if (length(tissue_match ) > 0 ) {
298- cat(' \n ' )
306+ cat(" \n " )
299307 stop(paste(tissue [tissue_match ], " , not matched with the tissue types in CellMatch database! Please select one or more related tissue types." ,
300308 sep = " " ))
301309 }
@@ -314,20 +322,20 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
314322 }
315323 }
316324 if (length(cellmarker_num ) < 100 ) {
317- cat(' \n ' )
325+ cat(" \n " )
318326 cat(paste(" Warning: there are only " , length(cellmarker_num ), " potential marker genes in CellMatch database for " ,
319327 species , " " , cancer1 , " on " , tissue1 , " !" , sep = " " ), " \n " )
320328 }
321329 if (length(cellmarker_num ) > = 100 ) {
322- cat(' \n ' )
330+ cat(" \n " )
323331 cat(paste(" Note: there are " , length(cellmarker_num ), " potential marker genes in CellMatch database for " ,
324332 species , " " , cancer1 , " on " , tissue1 , " ." , sep = " " ), " \n " )
325333 }
326334 ndata <- ndata [rownames(ndata ) %in% cellmarker_num , ]
327335 }
328336 }
329337 if (nrow(ndata ) == 0 ) {
330- cat(' \n ' )
338+ cat(" \n " )
331339 stop(" There is no matched potential marker genes in the matrix! Please try again to find differential expressed genes by setting match_CellMatch as FALSE!" )
332340 }
333341 # generating pair-wise clusters
@@ -341,26 +349,32 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
341349 cluster_match <- cluster %in% clu_num
342350 cluster_match <- which(cluster_match == " FALSE" )
343351 if (length(cluster_match ) > 0 ) {
344- cat(' \n ' )
352+ cat(" \n " )
345353 stop(paste(cluster [cluster_match ], " , not matched with the cell clusters! Please select one or more related clusters." ,
346354 sep = " " ))
347355 }
348356 clu_pair <- clu_pair [clu_pair $ cluster1 %in% cluster , ]
349357 clu_num1 <- clu_num [clu_num %in% cluster ]
350358 }
359+ cat(" Note: There are" , length(clu_num ), " clusters in Seurat object." , " \n " )
360+ Sys.sleep(2 )
361+ cat(" Preparing to find potential marker genes for" , cluster , " cluster(s)." , " \n " )
351362 # generating result file
352363 clu_marker <- NULL
353364 Sys.sleep(2 )
354- cat(' \n ' )
355365 # calculate the p value and log fold change
356366 for (i in 1 : length(clu_num1 )) {
357367 cat(paste(" Finding potential marker genes for cluster" , clu_num1 [i ], sep = " " ), " \n " )
358- clu_pair1 <- clu_pair [clu_pair $ cluster1 == clu_num [i ], ]
368+ clu_pair1 <- clu_pair [clu_pair $ cluster1 == clu_num1 [i ], ]
359369 # generating result file for each cluster
360370 clu_marker1 <- NULL
371+ pb <- progress_bar $ new(format = " [:bar] Finished::percent Remaining::eta" , total = nrow(clu_pair1 ) *
372+ nrow(ndata ), clear = FALSE , width = 60 , complete = " +" , incomplete = " -" )
373+
361374 for (j in 1 : nrow(clu_pair1 )) {
362375 clu_marker2 <- as.data.frame(matrix (data = 0 , nrow = nrow(ndata ), ncol = 6 ))
363- colnames(clu_marker2 ) <- c(" cluster" , " gene" , " pct" , " comp_cluster" , " avg_logfc" , " pvalue" )
376+ colnames(clu_marker2 ) <- c(" cluster" , " gene" , " pct" , " comp_cluster" , " avg_logfc" ,
377+ " pvalue" )
364378 clu_marker2 [, 2 ] <- rownames(ndata )
365379 # extract normalized data of pair wise clusters
366380 ndata1 <- ndata [, clu_info [clu_info $ cluster == clu_pair1 $ cluster1 [j ], ]$ cell ]
@@ -379,6 +393,7 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
379393 genedata2 <- as.numeric(ndata2 [k , ])
380394 clu_marker_pvalue [k ] <- stats :: wilcox.test(genedata1 , genedata2 , )$ p.value
381395 }
396+ pb $ tick()
382397 }
383398 clu_marker2 $ pct <- clu_marker_pct
384399 clu_marker2 $ pvalue <- clu_marker_pvalue
@@ -392,21 +407,22 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
392407 d1 <- as.data.frame(table(clu_marker1 $ gene ), stringsAsFactors = F )
393408 d1 <- d1 [d1 $ Freq == (length(clu_num ) - 1 ), ]
394409 if (nrow(d1 ) > 1 ) {
395- clu_marker1 <- clu_marker1 [clu_marker1 $ gene %in% d1 $ Var1 , ]
396- clu_marker_gene <- unique(clu_marker1 $ gene )
397- clu_marker_gene <- clu_marker_gene [order(clu_marker_gene )]
398- avg_logfc <- NULL
399- for (j in 1 : length(clu_marker_gene )) {
400- clu_marker_avg_logfc <- clu_marker1 [clu_marker1 $ gene == clu_marker_gene [j ], ]
401- avg_logfc [j ] <- mean(clu_marker_avg_logfc $ avg_logfc )
402- }
403- clu_marker1 <- unique(clu_marker1 [, c(" cluster" , " gene" , " pct" )])
404- clu_marker1 <- clu_marker1 [order(clu_marker1 $ gene ), ]
405- clu_marker1 $ avg_logfc <- avg_logfc
406- clu_marker <- rbind(clu_marker , clu_marker1 )
410+ clu_marker1 <- clu_marker1 [clu_marker1 $ gene %in% d1 $ Var1 , ]
411+ clu_marker_gene <- unique(clu_marker1 $ gene )
412+ clu_marker_gene <- clu_marker_gene [order(clu_marker_gene )]
413+ avg_logfc <- NULL
414+ for (j in 1 : length(clu_marker_gene )) {
415+ clu_marker_avg_logfc <- clu_marker1 [clu_marker1 $ gene == clu_marker_gene [j ],
416+ ]
417+ avg_logfc [j ] <- mean(clu_marker_avg_logfc $ avg_logfc )
418+ }
419+ clu_marker1 <- unique(clu_marker1 [, c(" cluster" , " gene" , " pct" )])
420+ clu_marker1 <- clu_marker1 [order(clu_marker1 $ gene ), ]
421+ clu_marker1 $ avg_logfc <- avg_logfc
422+ clu_marker <- rbind(clu_marker , clu_marker1 )
407423 }
408424 }
409- cat(" *** Done*** " , " \n " )
425+ cat(" --- Done--- " , " \n " )
410426 Sys.sleep(2 )
411427 }
412428 rm(ndata1 , ndata2 )
@@ -422,13 +438,13 @@ findmarkergenes <- function(object, species = NULL, cluster = "All", match_CellM
422438 res [[2 ]] <- " NA"
423439 names(res ) <- c(" new_data_matrix" , " clu_markers" )
424440 if (is.null(clu_marker )) {
425- cat(' \n ' )
441+ cat(" \n " )
426442 cat(" Warning: there is no potential marker genes found in the matrix! You may adjust the cell clusters by applying other clustering algorithm and try again." )
427443 return (res )
428444 stop()
429445 }
430446 if (nrow(clu_marker ) == 0 ) {
431- cat(' \n ' )
447+ cat(" \n " )
432448 cat(" Warning: there is no potential marker genes found in the matrix! You may adjust the cell clusters by applying other clustering algorithm and try again." )
433449 return (res )
434450 stop()
0 commit comments