SchlossLab
diff --git a/‎.github/workflows/tests.yml
Lines changed: 11 additions & 11 deletions b/‎.github/workflows/tests.yml
Lines changed: 11 additions & 11 deletions
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 0 deletions b/‎README.md
Lines changed: 2 additions & 0 deletions
diff --git a/‎config/README.md
Lines changed: 8 additions & 5 deletions b/‎config/README.md
Lines changed: 8 additions & 5 deletions
diff --git a/‎config/default.yml
Lines changed: 3 additions & 2 deletions b/‎config/default.yml
Lines changed: 3 additions & 2 deletions
diff --git a/‎config/robust.yml
Lines changed: 18 additions & 0 deletions b/‎config/robust.yml
Lines changed: 18 additions & 0 deletions
diff --git a/‎config/test.yml
Lines changed: 12 additions & 0 deletions b/‎config/test.yml
Lines changed: 12 additions & 0 deletions
diff --git a/‎figures/benchmarks-example.png
-33.1 KB b/‎figures/benchmarks-example.png
-33.1 KB
diff --git a/‎figures/dag.png
21.3 KB b/‎figures/dag.png
21.3 KB
diff --git a/‎figures/example/benchmarks.png
38.1 KB b/‎figures/example/benchmarks.png
38.1 KB
@@ -8,17 +8,17 @@ on:
 
 
 jobs:
-  Formatting:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v2
-      - name: Formatting
-        uses: github/super-linter@v4
-        env:
-          VALIDATE_ALL_CODEBASE: false
-          DEFAULT_BRANCH: main
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          VALIDATE_SNAKEMAKE_SNAKEFMT: true
+  # Formatting:
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #     - uses: actions/checkout@v2
+  #     - name: Formatting
+  #       uses: github/super-linter@v4
+  #       env:
+  #         VALIDATE_ALL_CODEBASE: false
+  #         DEFAULT_BRANCH: main
+  #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  #         VALIDATE_SNAKEMAKE_SNAKEFMT: true
 
   Linting:
     runs-on: ubuntu-latest
 
@@ -6,3 +6,4 @@ results/
 .Rproj.user
 *KLS*
 *_test*
+.vscode/
@@ -30,13 +30,15 @@ combines the results files, plots performance results
 (cross-validation and test AUROCs, hyperparameter AUROCs from cross-validation, and benchmark performance),
 and renders a simple [R Markdown report](report.Rmd) as a GitHub-flavored markdown file ([example](report-example.md)).
 
+<!-- Create the rulegraph with workflow/scripts/rulegraph.sh -->
 ![rulegraph](figures/rulegraph.png)
 
 The DAG shows how calls to `run_ml` can run in parallel if
 snakemake is allowed to run more than one job at a time.
 If we use 100 seeds and 4 ML methods, snakemake would call `run_ml` 400 times.
 Here's a small example DAG if we were to use only 2 seeds and 1 ML method:
 
+<!-- Create the dag with workflow/scripts/dag.sh -->
 ![dag](figures/dag.png)
 
 
 
@@ -35,11 +35,14 @@
 
 1. Edit the configuration file [`config/default.yml`](/config/default.yml).
     - `dataset_csv`: the path to the dataset as a csv file.
-    - `outcome_colname`: column name of the outcomes for the dataset.
-    - `ml_methods`: list of machine learning methods to use. Must be supported by mikropml.
+    - `dataset_csv`: a short name to identify the dataset.
+    - `outcome_colname`: column name of the outcomes or classes for the dataset.
+    - `ml_methods`: list of machine learning methods to use. Must be [supported by mikropml or caret](http://www.schlosslab.org/mikropml/articles/introduction.html#the-methods-we-support).
     - `kfold`: k number for k-fold cross validation during model training.
-    - `ncores`: the number of cores to use for preprocessing and for each `mikropml::run_ml()` call. Do not exceed the number of cores you have available.
-    - `nseeds`: the number of different random seeds to use for training models with `mikropml::run_ml()`.
+    - `ncores`: the number of cores to use for `preprocess_data()`, `run_ml()`, and `get_feature_importance()`. Do not exceed the number of cores you have available.
+    - `nseeds`: the number of different random seeds to use for training models with `run_ml()`. This will result in `nseeds` different train/test splits.
+    - `find_feature_importance`: whether to calculate feature importances with permutation tests (`true` or `false`). If `false`, the plot in the report will be blank.
+    - `hyperparams`: override the default model hyperparameters set by mikropml for each ML method (optional). Leave this blank if you'd like to use the defaults. You will have to set these if you wqish to use an ML method from caret that we don't officially support.
 
     You can leave these options as-is if you'd like to first make sure the
     workflow runs without error on your machine before using your own dataset
@@ -89,7 +92,7 @@
 1. View the results in `report.md` ([see example here](report-example.md)).
 
     This example report was created by running the workflow on the Great Lakes HPC
-    at the University of Michigan with [`config/robust.yml`](config/robust.yml).
+    at the University of Michigan.
 
 ## Out of memory or walltime
 
 
@@ -4,7 +4,8 @@ outcome_colname: dx
 ml_methods:
  - glmnet
  - rf
- - svmRadial
 kfold: 5
-ncores: 4
+ncores: 8
 nseeds: 10
+find_feature_importance: true
+hyperparams:
@@ -9,3 +9,21 @@ ml_methods:
 kfold: 5
 ncores: 36
 nseeds: 100
+find_feature_importance: false
+hyperparams:
+  - glmnet:
+    - alpha: 
+      - 0
+    - lambda:
+      - 0.0001
+      - 0.001
+      - 0.01
+      - 0.1
+      - 1
+      - 10
+  - rf:
+    - mtry:
+      - 42
+      - 83
+      - 166
+
@@ -6,3 +6,15 @@ ml_methods:
 kfold: 2
 ncores: 4
 nseeds: 2
+find_feature_importance: true
+hyperparams:
+  - glmnet:
+    - alpha: 
+      - 0
+    - lambda:
+      - 0.0001
+      - 0.001
+      - 0.01
+      - 0.1
+      - 1
+      - 10