Skip to content

feat: added get-rm-stats #77

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .azure/prepare.bash
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ mkdir -p "${R_LIBS_USER}"

Rscript -e "install.packages(c('git2r', 'covr', 'withr', 'devtools', 'lintr', 'mockery'), repos='https://cloud.r-project.org', lib='${R_LIBS_USER}')"
Rscript -e "install.packages(c('DSI', 'metafor', 'meta'), repos='https://cloud.r-project.org', lib='${R_LIBS_USER}')"
Rscript -e "withr::with_libpaths(new = '${R_LIBS_USER}', devtools::install_github('datashield/dsBaseClient', ref='6.1.0'))"
Rscript -e "withr::with_libpaths(new = '${R_LIBS_USER}', devtools::install_github('datashield/dsBaseClient', ref='v6.2-dev'))"
cd "${BUILD_REPOSITORY_LOCALPATH}"
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: dsHelper
Type: Package
Title: Helper Functions for Use with 'DataSHIELD'
Version: 0.1.2.9000
Version: 1.0.0.9000
Description: Often we need to automate things with 'DataSHIELD'. These functions help to do that.
Authors@R:
c(person(given= "Tim",
Expand All @@ -22,7 +22,7 @@ Maintainer: Tim Cadman <t.cadman@bristol.ac.uk>
Depends:
R (>= 3.5.0)
Imports:
dsBaseClient (>= 6.1.0),
dsBaseClient (>= 6.2.0),
dplyr (>= 1.0.2),
tibble,
purrr,
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(dh.defineCases)
export(dh.dropCols)
export(dh.findVarsIndex)
export(dh.getAnonPlotData)
export(dh.getRmStats)
export(dh.getStats)
export(dh.lmTab)
export(dh.lmeMultPoly)
Expand Down Expand Up @@ -55,6 +56,7 @@ importFrom(dplyr,summarize)
importFrom(dplyr,ungroup)
importFrom(dplyr,vars)
importFrom(dsBaseClient,ds.Boole)
importFrom(dsBaseClient,ds.asFactorSimple)
importFrom(dsBaseClient,ds.asNumeric)
importFrom(dsBaseClient,ds.assign)
importFrom(dsBaseClient,ds.cbind)
Expand All @@ -79,6 +81,9 @@ importFrom(dsBaseClient,ds.reShape)
importFrom(dsBaseClient,ds.replaceNA)
importFrom(dsBaseClient,ds.rm)
importFrom(dsBaseClient,ds.scatterPlot)
importFrom(dsBaseClient,ds.summary)
importFrom(dsBaseClient,ds.tapply)
importFrom(dsBaseClient,ds.tapply.assign)
importFrom(magrittr,"%<>%")
importFrom(metafor,rma)
importFrom(purrr,cross2)
Expand Down
136 changes: 136 additions & 0 deletions R/get-rm-stats.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#' Produces descriptive statistics based on repeated measures data
#' which it would be useful to report in papers.
#'
#' @importFrom dplyr %>% mutate across
#' @importFrom dsBaseClient ds.summary ds.asFactorSimple ds.tapply.assign ds.tapply
#'
#' @param df datashield dataframe
#' @param outcome name of outcome variable in df
#' @param id_var name of id variable in df
#' @param age_var name of age variable in df
#' @param conns connection object for DataSHIELD backends
#'
#' @return a tibble containing the following columns:
#'
#' min_age: 5th percentile of age
#' max_age: 95th percentile of age
#' n_obs: total number of observations in data
#' n_participants: total number of unique participants
#' n_meas_5: 5th percentile of measurements per individual
#' n_meas_med: median number of measurements per individual
#' n_meas_95: 95th percentile of measurements per individual
#'
#' @export
dh.getRmStats <- function(df = NULL, outcome = NULL, id_var = NULL, age_var = NULL, conns = NULL) {
. <- n_meas_5 <- n_meas_95 <- n_meas_med <- variable <- perc_5 <- perc_95 <- cohort <- min_age <-
max_age <- valid_n <- NULL

if (is.null(df)) {
stop("Please provide the name of a datashield dataframe")
}

if (is.null(outcome)) {
stop("Please provide the name of your outcome variable")
}

if (is.null(id_var)) {
stop("Please provide the name of id variable in df")
}

if (is.null(age_var)) {
stop("Please provide the name of your age variable in df")
}

if (is.null(conns)) {
conns <- datashield.connections_find()
}

## ---- First get overall stats for some of the easy ones -------------------------------------------
stats <- dh.getStats(
df = df,
vars = c(outcome, age_var),
conns = conns
)

## ---- Age range of participants -------------------------------------------------------------------
age_ranges <- stats$continuous %>%
dplyr::filter(variable == age_var) %>%
mutate(
min_age = perc_5,
max_age = perc_95
) %>%
dplyr::select(cohort, min_age, max_age)

## ---- Total number of outcome measurements -------------------------------------
outcome_n <- stats$continuous %>%
dplyr::filter(variable == outcome) %>%
dplyr::select(cohort, n_obs = valid_n)


## ---- Total number of unique participants ----------------------------------------

# First, we use ds.tapply.assign to summarise the number of observations for each
# subject. The length of this created object then gives us the number of subjects.

ds.asFactorSimple(paste0(df, "$", id_var), "id_fact", datasources = conns)

ds.tapply.assign(
X.name = "data$weight",
INDEX.names = "id_fact",
FUN.name = "N",
newobj = "id_summary",
datasources = conns
)

n_subjects <- DSI::datashield.aggregate(conns, call("lengthDS", "id_summary$N")) %>%
setNames(names(conns)) %>%
bind_rows() %>%
mutate(combined = rowSums(.)) %>%
pivot_longer(
cols = everything(),
names_to = "cohort",
values_to = "n_participants"
)

## ---- Median number of weight measurements per child ----------------------------------------

# We can use the ds.quantileMean function with the object we created above to get the
# median number of measurements per child.

ds.asNumeric("id_summary$N", "id_summary_num", datasources = conns)

quants <- DSI::datashield.aggregate(conns, as.symbol("quantileMeanDS(id_summary_num)"))

weight_med_iqr <- quants %>%
bind_rows(.id = "cohort") %>%
select(cohort, "5%", "50%", "95%") %>%
rename(n_meas_med = "50%", n_meas_5 = "5%", n_meas_95 = "95%")

## Get the combined version using weighted sum
lengths <- DSI::datashield.aggregate(conns, call("lengthDS", "id_summary_num"))
numNAs <- DSI::datashield.aggregate(conns, "numNaDS(id_summary_num)")

valid_n <- list(lengths, numNAs) %>% pmap(~ .x - .y)

weights <- unlist(valid_n) / sum(unlist(valid_n))

weighted_quant <- list(quants, weights) %>% pmap(~ .x * .y)

sum_quant <- weighted_quant %>%
pmap(function(...) {
sum(c(...))
}) %>%
bind_rows() %>%
rename(n_meas_med = "50%", n_meas_5 = "5%", n_meas_95 = "95%") %>%
mutate(cohort = "combined") %>%
select(cohort, n_meas_med, n_meas_5, n_meas_95)

quant_out <- bind_rows(weight_med_iqr, sum_quant)

## ---- Create final output -------------------------------------------------------------------
out <- left_join(age_ranges, outcome_n, by = "cohort") %>%
left_join(., n_subjects, by = "cohort") %>%
left_join(., quant_out, by = "cohort")

return(out)
}
2 changes: 1 addition & 1 deletion R/tidy-env.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ dh.tidyEnv <- function(obj = NULL, type = c("remove", "keep"), conns = NULL) {
bind_rows()

vars_tibble %>% pmap(function(cohort, value) {
ds.rm(x.name = value, datasources = conns[cohort])
ds.rm(x.names = value, datasources = conns[cohort])
})
}
}
41 changes: 41 additions & 0 deletions man/dh.getRmStats.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.