Skip to content

Commit 5d3251e

Browse files
v0.3.0 release canidate 1
1 parent 8b4778f commit 5d3251e

35 files changed

+357
-862
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: metacoder
22
Title: Tools for Parsing, Manipulating, and Graphing Taxonomic Abundance Data
3-
Version: 0.2.1.9012
3+
Version: 0.3.0
44
Authors@R: c(person("Zachary", "Foster", email =
55
"zacharyfoster1989@gmail.com", role = c("aut", "cre")),
66
person("Niklaus", "Grunwald", email =
@@ -59,7 +59,7 @@ Suggests:
5959
zlibbioc
6060
VignetteBuilder: knitr
6161
RoxygenNote: 6.1.0
62-
Date: 2018-05-01
62+
Date: 2018-08-27
6363
Encoding: UTF-8
6464
biocViews:
6565
LinkingTo: Rcpp

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,5 @@ importFrom(dplyr,num_range)
6363
importFrom(dplyr,one_of)
6464
importFrom(dplyr,starts_with)
6565
importFrom(magrittr,"%>%")
66+
importFrom(rlang,.data)
6667
useDynLib(metacoder)

NEWS.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# News
22

3-
## Current
3+
## metacoder 0.3.0
44

55
### Bug fixes
66

@@ -13,7 +13,7 @@
1313

1414
* Added `more_than` option to `calc_n_samples` so that users can set the minimum threshold for whether a sample is counted or not instead of it always 1.
1515
* Added `calc_prop_samples` function for calculating the proportion of samples with a value greater than 0 (issues [#233](https://github.com/grunwaldlab/metacoder/issues/233).
16-
* primersearch is faster and takes less memory by using `ape::DNAbin` objects internally.
16+
* `primersearch` is faster and takes less memory by using `ape::DNAbin` objects internally.
1717
* Made `calc_taxon_abund` about 5x faster.
1818

1919
### New features
@@ -24,7 +24,8 @@
2424
### Changes
2525

2626
* `primersearch` now takes and returns a `taxmap` object with results added as tables. `primersearch_raw` is a new function that behaves like the old `primersearch` did, returning a table.
27-
* The `dataset` option of many functions has been renamed to `data` to match the option name in the `taxa` pacakge.
27+
* The `dataset` option of many functions has been renamed to `data` to match the option name in the `taxa` package.
28+
* Numerous spelling fixes.
2829

2930
## metacoder 0.2.1
3031

R/calculations.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ calc_group_median <- function(obj, data, groups, cols = NULL,
182182
#'
183183
#' For a given table in a \code{\link[taxa]{taxmap}} object, apply a function to
184184
#' rows in groups of columns. The result of the function is used to create new
185-
#' columns. This is eqivalant to splitting columns of a table by a factor and
185+
#' columns. This is equivalent to splitting columns of a table by a factor and
186186
#' using \code{apply} on each group.
187187
#'
188188
#' @inheritParams do_calc_on_num_cols
@@ -604,7 +604,7 @@ compare_groups <- function(obj, data, cols, groups,
604604
# Check groups option
605605
groups <- check_option_groups(groups, cols)
606606

607-
# Define defualt function
607+
# Define default function
608608
if (is.null(func)) {
609609
func <- function(abund_1, abund_2) {
610610
median_1 <- stats::median(abund_1, na.rm = TRUE)
@@ -971,7 +971,7 @@ calc_prop_samples <- function(obj, data, cols = NULL, groups = "n_samples",
971971
#'
972972
#' For a given table in a \code{\link[taxa]{taxmap}} object, apply a function to
973973
#' rows in groups of columns. The result of the function is used to create new
974-
#' columns. This is eqivalant to splitting columns of a table by a factor and
974+
#' columns. This is equivalent to splitting columns of a table by a factor and
975975
#' using \code{apply} on each group.
976976
#'
977977
#' @inheritParams do_calc_on_num_cols

R/heat_tree.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ heat_tree.Taxmap <- function(.input, ...) {
5757
#'
5858
#' Plots the distribution of values associated with a taxonomic classification/heirarchy.
5959
#' Taxonomic classifications can have multiple roots, resulting in multiple trees on the same plot.
60-
#' A tree consists of elements, element protperties, conditions, and mapping properties which are
60+
#' A tree consists of elements, element properties, conditions, and mapping properties which are
6161
#' represented as parameters in the heat_tree object.
6262
#' The elements (e.g. nodes, edges, lables, and individual trees) are the infrastructure of the heat tree.
6363
#' The element properties (e.g. size and color) are characteristics that are manipulated by various
@@ -139,7 +139,7 @@ heat_tree.Taxmap <- function(.input, ...) {
139139
#' Default: \code{"area"}.
140140
#'
141141
#' @param node_size_range See details on ranges.
142-
#' Defualt: Optimize to balance overlaps and range size.
142+
#' Default: Optimize to balance overlaps and range size.
143143
#' @param edge_size_range See details on ranges.
144144
#' Default: relative to node size range.
145145
# #' @param tree_size_range See details on ranges.
@@ -220,7 +220,7 @@ heat_tree.Taxmap <- function(.input, ...) {
220220
#' Default: Do not save plot.
221221
#'
222222
#' @param aspect_ratio The aspect_ratio of the plot.
223-
#' @param repel_labels If \code{TRUE} (Defualt), use the ggrepel package to spread out labels.
223+
#' @param repel_labels If \code{TRUE} (Default), use the ggrepel package to spread out labels.
224224
#' @param repel_force The force of which overlapping labels will be repelled from eachother.
225225
#' @param repel_iter The number of iterations used when repelling labels
226226
#' @param verbose If \code{TRUE} print progress reports as the function runs.
@@ -290,7 +290,7 @@ heat_tree.Taxmap <- function(.input, ...) {
290290
#' @section ranges:
291291
#'
292292
#' The displayed range of colors and sizes can be explicitly defined or automatically generated.
293-
#' When explicitely used, the size range will proportionately increase/decrease the size of a particular element.
293+
#' When explicitly used, the size range will proportionately increase/decrease the size of a particular element.
294294
#' Size ranges are specified by supplying a \code{numeric} vector with two values: the minimum and maximum.
295295
#' The units used should be between 0 and 1, representing the proportion of a dimension of the graph.
296296
#' Since the dimensions of the graph are determined by layout, and not always square, the value
@@ -325,7 +325,7 @@ heat_tree.Taxmap <- function(.input, ...) {
325325
#'
326326
#' This is the minimum and maximum of values displayed on the legend scales.
327327
#' Intervals are specified by supplying a \code{numeric} vector with two values: the minimum and maximum.
328-
#' When explicitely used, the <element>_<property>_interval will redefine the way the actual conditional values are being represented
328+
#' When explicitly used, the <element>_<property>_interval will redefine the way the actual conditional values are being represented
329329
#' by setting a limit for the <element>_<property>.
330330
#' Any condition below the minimum <element>_<property>_interval will be graphically represented the same as a condition AT the
331331
#' minimum value in the full range of conditional values. Any value above the maximum <element>_<property>_interval will be graphically

R/heat_tree_matrix.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#' Plot a matrix of heat trees
22
#'
3-
#' Plot a matrix of heat trees for showing parwise comparisons. A larger,
3+
#' Plot a matrix of heat trees for showing pairwise comparisons. A larger,
44
#' labelled tree serves as a key for the matrix of smaller unlabelled trees. The
55
#' data for this function is typically created with \code{\link{compare_groups}},
66
#'

R/option_parsers.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ verify_taxmap <- function(obj) {
2020
#' Get a data set from a taxmap object
2121
#'
2222
#' NOTE: This will be replaced by the function `get_dataset` in the `taxa`
23-
#' pacakge. Get a data set from a taxmap object and complain if it does not
23+
#' package. Get a data set from a taxmap object and complain if it does not
2424
#' exist. This is intended to be used to parse options in other functions.
2525
#'
2626
#' @param obj A taxmap object

R/parsers.R

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ parse_unite_general <- function(input = NULL, file = NULL, include_seqs = TRUE)
490490
#' @param include_seqs (\code{logical} of length 1) If \code{TRUE}, include
491491
#' sequences in the output object.
492492
#' @param add_species (\code{logical} of length 1) If \code{TRUE}, add the
493-
#' species information to the taxonomy. In this databse, the species name
493+
#' species information to the taxonomy. In this database, the species name
494494
#' often contains other information as well.
495495
#'
496496
#' @return \code{\link{taxmap}}
@@ -787,3 +787,52 @@ parse_ubiome <- function(file = NULL, table = NULL) {
787787
return(output)
788788
}
789789

790+
791+
#' Convert a table with an edge list to taxmap
792+
#'
793+
#' Converts a table containing an edge list into a [taxa::taxmap()] object.
794+
#' An "edge list" is two columns in a table, where each row defines a taxon-supertaxon relationship.
795+
#' The contents of the edge list will be used as taxon IDs.
796+
#' The whole table will be included as a data set in the output object.
797+
#'
798+
#' @param input A table containing an edge list encoded by two columns.
799+
#' @param taxon_id The name/index of the column containing the taxon IDs.
800+
#' @param supertaxon_id The name/index of the column containing the taxon IDs for the supertaxon of the IDs in `taxon_col`.
801+
#'
802+
#' @family parsers
803+
#'
804+
#' @keywords internal
805+
parse_edge_list <- function(input, taxon_id, supertaxon_id, taxon_name, taxon_rank = NULL) {
806+
807+
# Create empty taxmap object
808+
output <- taxmap()
809+
810+
# Make taxon ID characters
811+
input[taxon_id] <- as.character(input[[taxon_id]])
812+
input[supertaxon_id] <- as.character(input[[supertaxon_id]])
813+
814+
# Add edge list
815+
output$edge_list <- data.frame(from = input[[supertaxon_id]],
816+
to = input[[taxon_id]],
817+
stringsAsFactors = FALSE)
818+
819+
# Add taxa
820+
output$taxa <- lapply(seq_len(nrow(input)), function(i) {
821+
my_name <- input[[taxon_name]][i]
822+
if (is.null(taxon_rank)) {
823+
my_rank <- NULL
824+
} else {
825+
my_rank <- input[[taxon_rank]][i]
826+
}
827+
my_id <- input[[taxon_id]][i]
828+
taxon(name = my_name, rank = my_rank, id = my_id)
829+
})
830+
names(output$taxa) <- input[[taxon_id]]
831+
832+
# Add data
833+
input <- dplyr::mutate(input, taxon_id = taxon_ids(output))
834+
input <- dplyr::select(input, taxon_id, everything())
835+
output$data <- list(input = input)
836+
837+
return(output)
838+
}

R/primersearch.R

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ primersearch_raw <- function(input = NULL, file = NULL, forward, reverse, mismat
345345
#'
346346
#' A pair of primers are aligned against a set of sequences. A
347347
#' \code{\link[taxa]{taxmap}} object with two tables is returned: a table with
348-
#' information for each predicited amplicon, quality of match, and predicted
348+
#' information for each predicted amplicon, quality of match, and predicted
349349
#' amplicons, and a table with per-taxon amplification statistics. Requires the
350350
#' EMBOSS tool kit (\url{http://emboss.sourceforge.net/}) to be installed.
351351
#'
@@ -389,6 +389,8 @@ primersearch_raw <- function(input = NULL, file = NULL, forward, reverse, mismat
389389
#' an external variable (i.e. not in \code{obj$data}), it must be named by
390390
#' taxon IDs or have the same length as the number of taxa in \code{obj}.
391391
#' Currently, only character vectors are accepted.
392+
#' @param clone If \code{TRUE}, make a copy of the input object and add on the results (like most R
393+
#' functions). If \code{FALSE}, the input will be changed without saving the result, which uses less RAM.
392394
#' @inheritParams primersearch_raw
393395
#'
394396
#' @return A copy of the input \code{\link[taxa]{taxmap}} object with two tables added. One table contains amplicon information with one row per predicted amplicon with the following info:
@@ -469,11 +471,39 @@ primersearch_raw <- function(input = NULL, file = NULL, forward, reverse, mismat
469471
#'
470472
#' @examples
471473
#' \dontrun{
474+
#' # Get example FASTA file
475+
#' fasta_path <- system.file(file.path("extdata", "silva_subset.fa"),
476+
#' package = "metacoder")
477+
#'
478+
#' # Parse the FASTA file as a taxmap object
479+
#' obj <- parse_silva_fasta(file = fasta_path)
472480
#'
481+
#' # Simulate PCR with primersearch
482+
#' # Have to replace Us with Ts in sequences since primersearch
483+
#' # does not understand Us.
484+
#' obj <- primersearch(obj,
485+
#' gsub(silva_seq, pattern = "U", replace = "T"),
486+
#' forward = c("U519F" = "CAGYMGCCRCGGKAAHACC"),
487+
#' reverse = c("Arch806R" = "GGACTACNSGGGTMTCTAAT"),
488+
#' mismatch = 10)
489+
#'
490+
#' # Plot what did not ampilify
491+
#' obj %>%
492+
#' filter_taxa(prop_amplified < 1) %>%
493+
#' heat_tree(node_label = taxon_names,
494+
#' node_color = prop_amplified,
495+
#' node_color_range = c("grey", "red", "purple", "green"),
496+
#' node_color_trans = "linear",
497+
#' node_color_axis_label = "Proportion amplified",
498+
#' node_size = n_obs,
499+
#' node_size_axis_label = "Number of sequences",
500+
#' layout = "da",
501+
#' initial_layout = "re")
473502
#' }
474503
#'
504+
#' @importFrom rlang .data
475505
#' @export
476-
primersearch <- function(obj, seqs, forward, reverse, mismatch = 5) {
506+
primersearch <- function(obj, seqs, forward, reverse, mismatch = 5, clone = TRUE) {
477507
# Non-standard argument evaluation
478508
data_used <- eval(substitute(obj$data_used(seqs)))
479509
sequences <- lazyeval::lazy_eval(lazyeval::lazy(seqs), data = data_used)
@@ -503,49 +533,54 @@ primersearch <- function(obj, seqs, forward, reverse, mismatch = 5) {
503533
}
504534

505535
# Make copy of input object to construct output
506-
output <- obj
536+
if (clone) {
537+
output <- obj$clone(deep = TRUE)
538+
} else {
539+
output <- obj
540+
}
507541

508542
# Run primer search
509543
if ("amplicons" %in% names(output$data)) {
510544
warning(call. = FALSE,
511545
'The existing dataset "amplicons" will be overwritten.')
512546
}
513547
output$data$amplicons <- primersearch_raw(input = sequences, forward = forward,
514-
reverse = reverse, mismatch = mismatch) %>%
515-
dplyr::mutate(taxon_id = names(sequences)[input]) %>%
516-
dplyr::rename(seq_index = input) %>%
548+
reverse = reverse, mismatch = mismatch) %>%
549+
dplyr::mutate(taxon_id = names(sequences)[.data$input]) %>%
550+
dplyr::rename(seq_index = .data$input) %>%
517551
dplyr::select(taxon_id , everything())
518552

519553
# Make per-taxon table
520554
if ("tax_amp_stats" %in% names(output$data)) {
521555
warning(call. = FALSE,
522556
'The existing dataset "tax_amp_stats" will be overwritten.')
523557
}
524-
output$data$tax_amp_stats <- dplyr::tibble(taxon_id = obj$taxon_ids(),
525-
query_count = n_obs(output, sequences),
526-
seq_count = vapply(obs(output, data = "amplicons"),
558+
output$data$tax_amp_stats <- dplyr::tibble("taxon_id" = output$taxon_ids(),
559+
"query_count" = vapply(output$obs(sequences), length, numeric(1)),
560+
"seq_count" = vapply(output$obs("amplicons"),
527561
FUN.VALUE = numeric(1),
528562
FUN = function(i) length(unique(output$data$amplicons$seq_index[i]))),
529-
amp_count = n_obs(output, "amplicons"),
530-
amplified = amp_count > 0)
563+
"amp_count" = vapply(output$obs("amplicons"), length, numeric(1)))
564+
output$data$tax_amp_stats$amplified <- output$data$tax_amp_stats$amp_count > 0
531565

532566
# Check for multiple amplicons per sequence
533567
amp_per_seq_data <- output$data$amplicons %>%
534-
dplyr::group_by(seq_index) %>%
568+
dplyr::group_by(.data$seq_index) %>%
535569
dplyr::count() %>%
536-
dplyr::mutate(taxon_id = names(sequences)[seq_index], multiple = n > 1)
570+
dplyr::mutate(taxon_id = names(sequences)[.data$seq_index],
571+
multiple = .data$n > 1)
537572
output$data$tax_amp_stats$multiple <- unlist(output$obs_apply(amp_per_seq_data, function(i) any(amp_per_seq_data$multiple)))
538573

539574
# Calculate proportion amplified
540-
output$mutate_obs("tax_amp_stats", prop_amplified = seq_count / query_count)
575+
output$data$tax_amp_stats$prop_amplified <- output$data$tax_amp_stats$seq_count / output$data$tax_amp_stats$query_count
541576

542577
# Calculate amplicon length stats
543578
output$mutate_obs("tax_amp_stats",
544579
med_amp_len = unlist(output$obs_apply("amplicons", value = "amplicon", func = function(s) {
545580
if (length(s) == 0) {
546581
return(NA_real_)
547582
} else {
548-
return(median(nchar(s)))
583+
return(stats::median(nchar(s)))
549584
}
550585
})),
551586
min_amp_len = unlist(output$obs_apply("amplicons", value = "amplicon", func = function(s) {
@@ -566,7 +601,7 @@ primersearch <- function(obj, seqs, forward, reverse, mismatch = 5) {
566601
if (length(s) == 0) {
567602
return(NA_real_)
568603
} else {
569-
return(median(nchar(s)))
604+
return(stats::median(nchar(s)))
570605
}
571606
})),
572607
min_prod_len = unlist(output$obs_apply("amplicons", value = "product", func = function(s) {

R/writers.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#' Write an imitation of the Greengenes databse
1+
#' Write an imitation of the Greengenes database
22
#'
33
#' Attempts to save taxonomic and sequence information of a taxmap object in the
44
#' Greengenes output format. If the taxmap object was created using
@@ -84,7 +84,7 @@ write_greengenes <- function(obj, tax_file = NULL, seq_file = NULL,
8484
}
8585

8686

87-
#' Write an imitation of the RDP FASTA databse
87+
#' Write an imitation of the RDP FASTA database
8888
#'
8989
#' Attempts to save taxonomic and sequence information of a taxmap object in the
9090
#' RDP FASTA format. If the taxmap object was created using
@@ -216,7 +216,7 @@ write_mothur_taxonomy <- function(obj, file,
216216
}
217217

218218

219-
#' Write an imitation of the UNITE general FASTA databse
219+
#' Write an imitation of the UNITE general FASTA database
220220
#'
221221
#' Attempts to save taxonomic and sequence information of a taxmap object in the
222222
#' UNITE general FASTA format. If the taxmap object was created using
@@ -285,7 +285,7 @@ write_unite_general <- function(obj, file,
285285
writeLines(seq_content, file)
286286
}
287287

288-
#' Write an imitation of the SILVA FASTA databse
288+
#' Write an imitation of the SILVA FASTA database
289289
#'
290290
#' Attempts to save taxonomic and sequence information of a taxmap object in the
291291
#' SILVA FASTA format. If the taxmap object was created using

0 commit comments

Comments
 (0)