Skip to content

FinnishCancerRegistry/vame

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Package vame

vame makes it simpler to define and make use of metadata pertaining to one more variables (e.g. a tabular dataset). It implements the VariableMetadata class, which contains the metadata. The various metadata are accessed using slot functions such as vm@var_description_get.

The VariableMetadata class is intended for storing metadata for which there is "one right way". For instance, a variable has one correct description in text.

See the help page ?vame::VariableMetadata for more information. In particular see the examples.

R-CMD-check

Basic example

# vame::VariableMetadata

# maybe the most important type of metadata: what values are different
# variables allowed to have? here we define value_space objects for that.
value_space_d <- function() 1:3 * 100L
vm <- vame::VariableMetadata(
  var_dt = data.table::data.table(
    var_nm = c("a", "b", "c", "d", "e", "f", "g", "h"),
    type = c("categorical", "categorical",
             "categorical",
             "my_type_1", "my_type_2", "my_type_3", "my_type_4", "my_type_5")
  ),
  var_set_dt = data.table::data.table(
    id = c("ab", "c", "d", "e", "f", "g", "h"),
    var_nm_set = list(
      ab = c("a", "b"),
      c = "c", d = "d", e = "e", f = "f", g = "g", h = "h"),
    value_space = list(
      ab = list(dt = data.table::data.table(
        a = c(1L, 2L, 2L),
        b = c(11L, 21L, 22L)
      )),
      c = list(set = c("a", "b")),
      d = list(expr = quote(value_space_d())),
      e = list(bounds = list(
        lo = 0.0, hi = 10.0,
        lo_inclusive = TRUE, hi_inclusive = TRUE
      )),
      f = list(bounds = list(
        lo = as.Date("1901-01-01"), hi = as.Date("2023-12-31"),
        lo_inclusive = TRUE, hi_inclusive = TRUE
      )),
      g = list(unrestricted = list(class_set = c("IDate", "Date"))),
      h = list(regex = "^[a-z]$")
    )
  )
)

# this information can be used to e.g. verify data.
vm@var_assert(1L, var_nm = "a")
vm@var_assert(21L, var_nm = "b")
vm@var_assert("a", var_nm = "c")
vm@var_assert(100L, var_nm = "d")
vm@var_assert(c(0.0, 10.0), var_nm = "e")
vm@var_assert(as.Date("1901-01-01"), var_nm = "f")
vm@var_assert(data.table::as.IDate("1901-01-01"), var_nm = "g")
vm@var_assert(letters, var_nm = "h")
my_fun <- function(e_values) {
  vm@var_assert(e_values, var_nm = "e")
  e_values + 1
}
my_fun(0.0)

# VariableMetadata objects can be modified after creation.
# lets change some metadata.
vm@var_meta_set(var_nm = "f", meta_nm = "type", value = "my_date")
stopifnot(
  vm@var_meta_get(var_nm = "f", meta_nm = "type") == "my_date"
)
vm@var_set_value_space_set(id = "c", value_space = list(set = c("x", "z")))
stopifnot(
  identical(vm@var_set_value_space_get(id = "c"), list(set = c("x", "z")))
)

# renaming, removing variables
vm <- vame::VariableMetadata(
  var_dt = data.table::data.table(
    var_nm = c("a", "b", "c"),
    flavour = c("tasty", "rancid", "bitter")
  ),
  var_set_dt = data.table::data.table(
    id = "set_01",
    var_nm_set = list(c("a", "b")),
    value_space = list(list(dt = data.table::data.table(
      a = 1:2,
      b = 3:4
    )))
  )
)

vm@var_rename("a", "A")
stopifnot(
  identical(vm@var_meta_get("A", "flavour"), "tasty"),
  identical(names(vm@var_set_value_space_get("set_01")[["dt"]]), c("A", "b")),
  identical(vm@var_set_meta_get("set_01", "var_nm_set"), c("A", "b"))
)

vm@var_set_rename("set_01", "Ab")
stopifnot(
  identical(vm@var_set_meta_get_all("id"), c("Ab" = "Ab"))
)

vm@var_remove("b")
stopifnot(
  identical(names(vm@var_set_value_space_get("Ab")[["dt"]]), "A"),
  identical(vm@var_set_meta_get("Ab", "var_nm_set"), "A")
)

vm@var_set_remove("Ab")
stopifnot(
  identical(length(vm@var_set_meta_get_all("var_nm_set")), 0L)
)

# another important feature is the possibility of retrieving a table of
# allowed categories for one or more variables. this comes up when (a subset
# of) observed data contains only some of the categories (think one age
# group for one sex is not present in data) but we want all possible
# categories to appear in our statistics. if we have defined these
# categories in advance as value_space objects, they are usable in this
# regard as well.
dt_01 <- data.table::CJ(a = 1:3, b = 3:1, c = 1:3)
dt_02 <- data.table::CJ(d = 1:2, e = 2:1)
vm <- vame::VariableMetadata(
  var_dt = data.table::data.table(
    var_nm = c("a", "b", "c", "d", "e", "f"),
    type = "categorical"
  ),
  var_set_dt = data.table::data.table(
    id = c("abc", "de", "f"),
    var_nm_set = list(
      abc = c("a", "b", "c"),
      de = c("d", "e"),
      f = "f"
     ),
    value_space = list(
      abc = list(dt = dt_01),
      de = list(expr = quote({
        dt_02[
          i = !duplicated(dt_02, by = var_nms),
          j = .SD,
          .SDcols = var_nms
        ]
      })),
      f = list(bounds = list(
        lo = 0L, hi = 10L,
        lo_inclusive = TRUE, hi_inclusive = TRUE
      ))
    )
  )
)

stopifnot(
  all.equal(
    vm@vame_category_space_dt(c("a", "b")),
    dt_01[
      i = !duplicated(dt_01, by = c("a", "b")),
      j = .SD,
      .SDcols = c("a", "b")
    ],
    check.attributes = FALSE
  ),
  all.equal(
    vm@vame_category_space_dt(c("d", "e")),
    dt_02,
    check.attributes = FALSE
  ),
  all.equal(
    vm@vame_category_space_dt(c("a", "f")),
    data.table::CJ(a = 1:3, f = 0:10),
    check.attributes = FALSE
  )
)

# getting category space data.tables --- here a variable appears in
# two different value spaces. this can be handy for defining joint value
# spaces and also conversions & aggregations.
dt_01 <- data.table::CJ(a = 1:3, b = 3:1, c = 1:3)
dt_02 <- data.table::CJ(a = 0:1, e = 2:1)
dt_03 <- data.table::data.table(a = 0:3, a_2 = c(1L,1L, 2L,2L))
vm <- vame::VariableMetadata(
  var_dt = data.table::data.table(
    var_nm = c("a", "b", "c", "e", "a_2"),
    type = "categorical"
  ),
  var_set_dt = data.table::data.table(
    id = c("set_01", "set_02", "set_03"),
    var_nm_set = list(c("a", "b", "c"), c("a", "e"), c("a", "a_2")),
    value_space = list(
      list(dt = dt_01),
      list(dt = dt_02),
      list(dt = dt_03)
    )
  )
)

obs <- vm@vame_category_space_dt(c("a", "b", "e"))
exp <- data.table::data.table(
  a = c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L),
  b = c(NA, NA, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 2L, 3L, 1L, 2L, 3L),
  e = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, NA, NA, NA, NA, NA, NA)
)
data.table::setkeyv(obs, names(obs))
data.table::setkeyv(exp, names(exp))
stopifnot(
  all.equal(obs, exp, check.attributes = FALSE)
)

stopifnot(
  vm@var_is_aggregateable_to("a", "a_2"),
  identical(vm@var_aggregate(0:1, "a", "a_2"), c(1L,1L))
)

# getting labels for variable levels
dt_01 <- data.table::CJ(a = 1:3, b = 3:1, c = 4:5)
vm <- vame::VariableMetadata(
  var_dt = data.table::data.table(
    var_nm = c("a", "b", "c"),
    type = "categorical",
    labeler = list(
      a = data.table::data.table(
        x = 1:3,
        en = paste0("a_level_", 1:3)
      ),
      b = quote({
        dt <- data.table::data.table(
          x = 1:3,
          en = paste0("b_level_", 1:3)
        )
        dt[[label_nm]][match(x, dt[["x"]])]
      }),
      c = NULL
    )
  ),
  var_set_dt = data.table::data.table(
    id = c("set_01"),
    var_nm_set = list(c("a", "b", "c")),
    value_space = list(
      list(dt = dt_01)
    )
  )
)

obs <- vm@var_labels_get(x = 1:4, var_nm = "a", label_nm = "en")
exp <- c(paste0("a_level_", 1:3), NA)
stopifnot(
  identical(obs, exp)
)

obs <- tryCatch(
  vm@var_labels_get(
    x = 1:4,
    var_nm = "a",
    label_nm = "this does not exist"
  ),
  error = function(e) e[["message"]]
)
exp <- paste0(
  "label_nm = \"this does not exist\"",
  " not one of the defined label names: \"en\""
)
stopifnot(
  grepl(exp, obs)
)

obs <- tryCatch(
  vm@var_labels_get(
    x = 1:4,
    var_nm = "c",
    label_nm = "en"
  ),
  error = function(e) e[["message"]]
)
exp <- "Variable \"c\" has no labeler defined"
stopifnot(
  grepl(exp, obs)
)

# adding data to a pre-existing VariableMetadata object
vm_1 <- vame::VariableMetadata(
  var_dt = data.table::data.table(
    var_nm = c("a", "b"),
    type = "categorical",
    labeler = list(
      a = data.table::data.table(x = 1:2, label = c("a_1", "a_2")),
      b = NULL
    )
  ),
  var_set_dt = data.table::data.table(
    id = "ab",
    var_nm_set = list(ab = c("a", "b")),
    value_space = list(ab = list(dt = data.table::CJ(a = 1:2, b = 3:4)))
  )
)
# note that vm_2 var_dt does not have columns "type", "labeler" --- those
# will be NA / NULL for "c" and "d".
vm_2 <- vame::VariableMetadata(
  var_dt = data.table::data.table(var_nm = c("c", "d")),
  var_set_dt = data.table::data.table(
    id = "cd",
    var_nm_set = list(cd = c("c", "d")),
    value_space = list(cd = list(dt = data.table::CJ(c = 5:6, d = 7:8)))
  )
)
vm_1@vame_union_append(vm_2)
stopifnot(
  c("ab", "cd") %in% vm_1@var_set_meta_get_all("id")
)

# taking a copy of a VariableMetadata object
vm_3 <- vm_2@vame_copy()
vm_2@var_rename("d", "dd")
stopifnot(
  "d" %in% vm_3@var_meta_get_all("var_nm"),
  !"d" %in% vm_2@var_meta_get_all("var_nm"),
  "dd" %in% vm_2@var_meta_get_all("var_nm")
)

Recommended installation

devtools::install_github(
  "FinnishCancerRegistry/vame@release"
)

Packages

No packages published

Languages