SchlossLab
diff --git a/‎README.md
Lines changed: 6 additions & 1 deletion b/‎README.md
Lines changed: 6 additions & 1 deletion
diff --git a/‎Snakefile
Lines changed: 29 additions & 1 deletion b/‎Snakefile
Lines changed: 29 additions & 1 deletion
diff --git a/‎code/combine_hp_perf.R
Lines changed: 6 additions & 0 deletions b/‎code/combine_hp_perf.R
Lines changed: 6 additions & 0 deletions
diff --git a/‎code/plot_hp_perf.R
Lines changed: 8 additions & 0 deletions b/‎code/plot_hp_perf.R
Lines changed: 8 additions & 0 deletions
diff --git a/‎code/preproc.R
Lines changed: 1 addition & 1 deletion b/‎code/preproc.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎config/cluster.json
Lines changed: 3 additions & 0 deletions b/‎config/cluster.json
Lines changed: 3 additions & 0 deletions
diff --git a/‎config/environment.yml
Lines changed: 1 addition & 0 deletions b/‎config/environment.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎figures/benchmarks.png renamed to ‎figures/benchmarks-example.png b/‎figures/benchmarks.png renamed to ‎figures/benchmarks-example.png
diff --git a/‎figures/hp_performance_glmnet-example.png
30.5 KB b/‎figures/hp_performance_glmnet-example.png
30.5 KB
diff --git a/‎figures/hp_performance_rf-example.png
29.5 KB b/‎figures/hp_performance_rf-example.png
29.5 KB
@@ -21,7 +21,8 @@ Snakemake automatically builds a directed acyclic graph (DAG) of jobs to figure
 out the dependencies of each of the rules and what order to run them in.
 This workflow preprocesses the example dataset, calls `mikropml::run_ml()`
 for each seed and ML method set in the config file,
-combines the results files, plots performance results,
+combines the results files, plots performance results 
+(cross-validation and test AUROCs, hyperparameter AUROCs from cross-validation, and benchmark performance),
 and renders a simple [R Markdown report](report.Rmd) as a GitHub-flavored markdown file ([example](report-example.md)).
 
 ![rulegraph](figures/rulegraph.png)
@@ -117,6 +118,10 @@ Here's a small example DAG if we were to use only 2 seeds and 2 ML methods:
     This example report was created by running the workflow on the Great Lakes HPC
     at the University of Michigan with [`config/config_robust.yml`](config/config_robust.yml).
 
+## Out of memory or walltime
+
+If any of your jobs fail because it ran out of memory, you can increase the memory for the given rule in the [`config/cluster.json`](config/cluster.json) file. For example, if the `combine_hp_performance` rule fails, you can increase the memory from 16GB to, say, 24GB. You can also change other slurm parameters from the defaults in this file (e.g. walltime, number of cores, etc.).
+
 ## More resources
 
 - [mikropml docs](http://www.schlosslab.org/mikropml/)
 
@@ -3,6 +3,7 @@ configfile: 'config/config.yml'
 ncores = config['ncores']
 ml_methods = config['ml_methods']
 kfold = config['kfold']
+outcome_colname = config['outcome_colname']
 
 nseeds = config['nseeds']
 start_seed = 100
@@ -22,6 +23,8 @@ rule preprocess_data:
         "log/preprocess_data.txt"
     benchmark:
         "benchmarks/preprocess_data.txt"
+    params:
+        outcome_colname=outcome_colname
     resources:
         ncores=ncores
     script:
@@ -40,7 +43,7 @@ rule run_ml:
     benchmark:
         "benchmarks/runs/run_ml.{method}_{seed}.txt"
     params:
-        outcome_colname=config['outcome_colname'],
+        outcome_colname=outcome_colname,
         method="{method}",
         seed="{seed}",
         kfold=kfold
@@ -62,6 +65,19 @@ rule combine_results:
     script:
         "code/combine_results.R"
 
+rule combine_hp_performance:
+    input:
+        R='code/combine_hp_perf.R',
+        rds=expand('results/runs/{{method}}_{seed}_model.Rds', seed=seeds)
+    output:
+        rds='results/hp_performance_results_{method}.Rds'
+    log:
+        "log/combine_hp_perf_{method}.txt"
+    benchmark:
+        "benchmarks/combine_hp_perf_{method}.txt"
+    script:
+        "code/combine_hp_perf.R"
+
 rule combine_benchmarks:
     input:
         R='code/combine_benchmarks.R',
@@ -84,6 +100,17 @@ rule plot_performance:
     script:
         "code/plot_perf.R"
 
+rule plot_hp_performance:
+    input: 
+        R='code/plot_hp_perf.R',
+        rds=rules.combine_hp_performance.output.rds,
+    output:
+        plot='figures/hp_performance_{method}.png'
+    log:
+        'log/plot_hp_perf_{method}.txt'
+    script:
+        'code/plot_hp_perf.R'
+
 rule plot_benchmarks:
     input:
         R='code/plot_benchmarks.R',
@@ -100,6 +127,7 @@ rule render_report:
         Rmd='report.Rmd',
         R='code/render.R',
         perf_plot=rules.plot_performance.output.plot,
+        hp_plot=expand(rules.plot_hp_performance.output.plot, method = ml_methods),
         bench_plot=rules.plot_benchmarks.output.plot
     output:
         doc='report.md'
 
@@ -0,0 +1,6 @@
+source("code/log_smk.R")
+
+models <- lapply(snakemake@input[["rds"]], function(x) readRDS(x))
+hp_perf <- mikropml::combine_hp_performance(models) 
+hp_perf$method <- snakemake@wildcards[["method"]]
+saveRDS(hp_perf, file = snakemake@output[["rds"]])
@@ -0,0 +1,8 @@
+source("code/log_smk.R")
+
+hp_perf <- readRDS(snakemake@input[["rds"]])
+hp_plot_list <- lapply(hp_perf$params, function(param){
+  mikropml::plot_hp_performance(hp_perf$dat, !!rlang::sym(param), !!rlang::sym(hp_perf$metric)) + ggplot2::theme_classic() + ggplot2::scale_color_brewer(palette = "Dark2") + ggplot2::labs(title=unique(hp_perf$method))
+})
+hp_plot <- cowplot::plot_grid(plotlist = hp_plot_list)
+ggplot2::ggsave(snakemake@output[["plot"]])
@@ -5,6 +5,6 @@ doFuture::registerDoFuture()
 future::plan(future::multicore, workers = snakemake@resources[["ncores"]])
 
 data_raw <- readr::read_csv(snakemake@input[["csv"]])
-data_processed <- preprocess_data(data_raw, outcome_colname = "dx")
+data_processed <- preprocess_data(data_raw, outcome_colname = snakemake@params[['outcome_colname']])
 
 saveRDS(data_processed, file = snakemake@output[["rds"]])
@@ -18,5 +18,8 @@
  "run_ml": {
   "procs": "{resources.ncores}",
   "pmem": "4GB"
+ },
+ "combine_hp_performance": {
+  "pmem": "16GB"
  }
 }
@@ -6,6 +6,7 @@ channels:
   - r
 dependencies:
   - r-base=4
+  - r-cowplot
   - r-doFuture
   - r-foreach
   - r-future
Original file line number	Diff line number	Diff line change
`@@ -18,5 +18,8 @@`
`18`	`18`	`"run_ml": {`
`19`	`19`	`"procs": "{resources.ncores}",`
`20`	`20`	`"pmem": "4GB"`
	`21`	`+ },`
	`22`	`+ "combine_hp_performance": {`
	`23`	`+ "pmem": "16GB"`
`21`	`24`	`}`
`22`	`25`	`}`