|
| 1 | +#### |
| 2 | +# Version: 0.10.2 |
| 3 | +# Author: Christian Jaeger (christian.jaeger@uk-halle.de) |
| 4 | +# 20250312 |
| 5 | +#### |
| 6 | + |
| 7 | +## Main Changes: |
| 8 | +# 0.10.2 Added Export Button |
| 9 | +################ |
| 10 | + |
| 11 | +## Beschreibung: |
| 12 | +# Diese Shiny-App ermöglicht das Hochladen und Zusammenführen von zwei CSV- oder TXT-Dateien. |
| 13 | +# Falls die Dateien keine Spaltennamen enthalten, können diese aus einer zugehörigen SQL-Dump-Datei extrahiert werden. |
| 14 | +# Der Nutzer kann auswählen, welche Spalten als ID für den Merge-Prozess verwendet werden sollen. |
| 15 | +# Zudem kann zwischen verschiedenen Join-Typen (Inner, Outer, Left, Right) gewählt werden. |
| 16 | +# Das zusammengeführte Ergebnis wird als interaktive Tabelle angezeigt und kann als CSV-Datei gespeichert werden. |
| 17 | +# Optional kann der Nutzer einen eigenen Dateinamen für den Export vergeben. |
| 18 | +################ |
| 19 | + |
| 20 | +library(shiny) |
| 21 | +library(readr) |
| 22 | +library(writexl) |
| 23 | +library(stringr) |
| 24 | +library(DT) |
| 25 | +library(shinyjs) |
| 26 | + |
| 27 | +extract_column_names <- function(sql_file) { |
| 28 | + sql_text <- readLines(sql_file, warn = FALSE) |
| 29 | + table_start <- grep("CREATE TABLE", sql_text) |
| 30 | + if (length(table_start) == 0) return(NULL) |
| 31 | + |
| 32 | + sql_text <- sql_text[(table_start + 1):length(sql_text)] |
| 33 | + table_end <- grep("\\) ENGINE", sql_text)[1] # Ensure capturing all column definitions |
| 34 | + if (is.na(table_end)) return(NULL) |
| 35 | + |
| 36 | + column_lines <- sql_text[1:(table_end - 1)] |
| 37 | + column_names <- str_extract_all(column_lines, "`([^`]*)`") |
| 38 | + column_names <- unlist(column_names) |
| 39 | + column_names <- column_names[!is.na(column_names)] |
| 40 | + column_names <- gsub("`", "", column_names) |
| 41 | + |
| 42 | + return(column_names) |
| 43 | +} |
| 44 | + |
| 45 | +clean_column_names <- function(col_names) { |
| 46 | + col_names <- make.names(col_names, unique = TRUE) |
| 47 | + col_names <- gsub("\\.+(\\d+)$", "", col_names) # Entfernt ...[Zahl] komplett |
| 48 | + return(col_names) |
| 49 | +} |
| 50 | + |
| 51 | +rename_columns <- function(df, file_name) { |
| 52 | + file_suffix <- tools::file_path_sans_ext(basename(file_name)) |
| 53 | + colnames(df) <- paste0(clean_column_names(colnames(df)), "_", file_suffix) |
| 54 | + return(df) |
| 55 | +} |
| 56 | + |
| 57 | +# UI der Shiny-App |
| 58 | +ui <- fluidPage( |
| 59 | + useShinyjs(), |
| 60 | + tags$head(tags$title("CSV/SQL - Merger - DIZ & Biomedical Data Science")), |
| 61 | + titlePanel( |
| 62 | + div( |
| 63 | + tags$img(src = "Logo_DIZ_DE.jpg", height = "80px", style = "margin-right: 10px;"), |
| 64 | + div( |
| 65 | + h1("CSV/SQL - Merger für File Preprocessings - 0.10.2", style = "margin-bottom: 0px;"), |
| 66 | + h4("Ein Service des Datenintegrationszentrums (DIZ) und der AG (Bio-) Medical Data Science", |
| 67 | + style = "margin-top: 5px; color: gray; font-weight: normal;") |
| 68 | + ) |
| 69 | + ) |
| 70 | + ), |
| 71 | + |
| 72 | + sidebarLayout( |
| 73 | + sidebarPanel( |
| 74 | + checkboxInput("has_headers1", "Datei 1 enthält Spaltennamen", value = FALSE), |
| 75 | + |
| 76 | + fileInput("file1", "Lade CSV- oder TXT-Datei 1 hoch", accept = c(".csv", ".txt")), |
| 77 | + fileInput("sql1", "Lade zugehörige SQL-Dump-Datei 1 hoch", accept = ".sql"), |
| 78 | + hr(), |
| 79 | + checkboxInput("has_headers2", "Datei 2 enthält Spaltennamen", value = FALSE), |
| 80 | + fileInput("file2", "Lade CSV- oder TXT-Datei 2 hoch", accept = c(".csv", ".txt")), |
| 81 | + fileInput("sql2", "Lade zugehörige SQL-Dump-Datei 2 hoch", accept = ".sql"), |
| 82 | + |
| 83 | + uiOutput("select_id1"), |
| 84 | + uiOutput("select_id2"), |
| 85 | + |
| 86 | + hr(), |
| 87 | + selectInput("join_type", "Join-Typ auswählen:", |
| 88 | + choices = list("Inner Join" = "inner", |
| 89 | + "Outer Join" = "outer", |
| 90 | + "Left Join" = "left", |
| 91 | + "Right Join" = "right")), |
| 92 | + actionButton("merge", "Merge-this-IDs"), |
| 93 | + hr(), |
| 94 | + textInput("filename", "Name der Datei: (optional)", value = ""), |
| 95 | + downloadButton("download", "Download Merged CSV"), |
| 96 | + downloadButton("download_excel", "Download Merged Excel"), |
| 97 | + hr(), |
| 98 | + tags$a(href = "readme.html", "Dokumentation öffnen", target = "_blank"), |
| 99 | + br(), br(), |
| 100 | + h4("Kontakt"), |
| 101 | + tags$p("Fragen? Schreiben Sie an: "), |
| 102 | + tags$a(href = "mailto:christian.jaeger@uk-halle.de", "christian.jaeger@uk-halle.de"), |
| 103 | + ), |
| 104 | + mainPanel( |
| 105 | + h3("Vorschau der Dateien"), |
| 106 | + fluidRow( |
| 107 | + column(6, DTOutput("preview1")), |
| 108 | + column(6, DTOutput("preview2")) |
| 109 | + ), |
| 110 | + h3("Zusammengeführte Datei"), |
| 111 | + DTOutput("merged_table") |
| 112 | + ) |
| 113 | + ) |
| 114 | +) |
| 115 | + |
| 116 | +server <- function(input, output, session) { |
| 117 | + observe({ |
| 118 | + toggleState("sql1", condition = !input$has_headers1) |
| 119 | + toggleState("sql2", condition = !input$has_headers2) |
| 120 | + }) |
| 121 | + |
| 122 | + data1 <- reactive({ |
| 123 | + req(input$file1) |
| 124 | + if (!grepl("\\.csv$|\\.txt$", input$file1$name, ignore.case = TRUE)) { |
| 125 | + showNotification("Fehler: Ungültiges Dateiformat für Datei 1.", type = "error") |
| 126 | + return(NULL) |
| 127 | + } |
| 128 | + if (input$has_headers1) { |
| 129 | + df <- read_csv(input$file1$datapath, show_col_types = FALSE) |
| 130 | + } else { |
| 131 | + req(input$sql1) |
| 132 | + col_names <- extract_column_names(input$sql1$datapath) |
| 133 | + if (is.null(col_names)) return(NULL) |
| 134 | + df <- read_csv(input$file1$datapath, col_names = col_names, skip = 0, show_col_types = FALSE) |
| 135 | + } |
| 136 | + rename_columns(df, input$file1$name) |
| 137 | + }) |
| 138 | + |
| 139 | + data2 <- reactive({ |
| 140 | + req(input$file2) |
| 141 | + if (!grepl("\\.csv$|\\.txt$", input$file2$name, ignore.case = TRUE)) { |
| 142 | + showNotification("Fehler: Ungültiges Dateiformat für Datei 2.", type = "error") |
| 143 | + return(NULL) |
| 144 | + } |
| 145 | + if (input$has_headers2) { |
| 146 | + df <- read_csv(input$file2$datapath, show_col_types = FALSE) |
| 147 | + } else { |
| 148 | + req(input$sql2) |
| 149 | + col_names <- extract_column_names(input$sql2$datapath) |
| 150 | + if (is.null(col_names)) return(NULL) |
| 151 | + df <- read_csv(input$file2$datapath, col_names = col_names, skip = 0, show_col_types = FALSE) |
| 152 | + } |
| 153 | + rename_columns(df, input$file2$name) |
| 154 | + }) |
| 155 | + |
| 156 | + output$preview1 <- renderDT({ |
| 157 | + req(data1()) |
| 158 | + datatable(data1(), options = list(scrollX = TRUE)) |
| 159 | + }) |
| 160 | + |
| 161 | + output$preview2 <- renderDT({ |
| 162 | + req(data2()) |
| 163 | + datatable(data2(), options = list(scrollX = TRUE)) |
| 164 | + }) |
| 165 | + |
| 166 | + output$select_id1 <- renderUI({ |
| 167 | + req(data1()) |
| 168 | + selectInput("id1", "Wähle ID-Spalte Datei 1:", choices = names(data1())) |
| 169 | + }) |
| 170 | + |
| 171 | + output$select_id2 <- renderUI({ |
| 172 | + req(data2()) |
| 173 | + selectInput("id2", "Wähle ID-Spalte Datei 2:", choices = names(data2())) |
| 174 | + }) |
| 175 | + |
| 176 | + merged_data <- reactiveVal() |
| 177 | + |
| 178 | + observeEvent(input$merge, { |
| 179 | + req(input$id1, input$id2) |
| 180 | + |
| 181 | + join_type <- input$join_type |
| 182 | + |
| 183 | + merged <- switch(join_type, |
| 184 | + "inner" = merge(data1(), data2(), by.x = input$id1, by.y = input$id2), |
| 185 | + "outer" = merge(data1(), data2(), by.x = input$id1, by.y = input$id2, all = TRUE), |
| 186 | + "left" = merge(data1(), data2(), by.x = input$id1, by.y = input$id2, all.x = TRUE), |
| 187 | + "right" = merge(data1(), data2(), by.x = input$id1, by.y = input$id2, all.y = TRUE)) |
| 188 | + |
| 189 | + merged_data(merged) |
| 190 | + }) |
| 191 | + |
| 192 | + output$merged_table <- renderDT({ |
| 193 | + req(merged_data()) |
| 194 | + datatable(merged_data(), options = list(scrollX = TRUE)) |
| 195 | + }) |
| 196 | + |
| 197 | + output$download <- downloadHandler( |
| 198 | + filename = function() { |
| 199 | + if (input$filename != "") { |
| 200 | + paste0(input$filename, ".csv") |
| 201 | + } else { |
| 202 | + paste0(tools::file_path_sans_ext(input$file1$name), "_", |
| 203 | + tools::file_path_sans_ext(input$file2$name), ".csv") |
| 204 | + } |
| 205 | + }, |
| 206 | + content = function(file) { |
| 207 | + req(merged_data()) |
| 208 | + write_csv(merged_data(), file) |
| 209 | + } |
| 210 | + ) |
| 211 | + |
| 212 | + output$download_excel <- downloadHandler( |
| 213 | + filename = function() { |
| 214 | + if (input$filename != "") { |
| 215 | + paste0(input$filename, ".xlsx") |
| 216 | + } else { |
| 217 | + paste0(tools::file_path_sans_ext(input$file1$name), "_", |
| 218 | + tools::file_path_sans_ext(input$file2$name), ".xlsx") |
| 219 | + } |
| 220 | + }, |
| 221 | + content = function(file) { |
| 222 | + req(merged_data()) |
| 223 | + write_xlsx(merged_data(), file) |
| 224 | + } |
| 225 | + ) |
| 226 | + } |
| 227 | + |
| 228 | +shinyApp(ui, server) |
0 commit comments