RScript RStudio for Health Research

14 Oktober 2023

RScript RStudio for Health Research

Sudah ikut Risetku Academy: RStudio for Health Research? Berikut rangkuman code R nya, silakan copy-paste di RScript RStudio Anda!

  • Munculkan panel RScript dengan cara:
    • Windows: CTRL + SHIFT + N
    • Mac: Command ⌘ + Shift + N
  • Eksekusi code per-baris dengan cara
    • Windows: CTRL + Enter
    • Mac: Command ⌘ + Return
# Basic Math
1 + 2
10/2

# Literal
TRUE

# Assign variable
object <- "object"
object

#Install Packages
install.packages("tidyverse")
install.packages("medicaldata")
library(tidyverse)
library(medicaldata)

#Load Data
medicaldata::covid_testing

library(readxl)
df <- read_excel("Downloads/risetku_rstudio/covid.xlsx")
# Path ke file excel, atau import dengan fitur `Files`
View(df)

help(covid_testing)

#View Data
head(df)
glimpse(df)

class(df$age)

#Data Cleaning
  #Select Data
  select(df, 
         -age,
         -gender,
         -rec_ver_tat,
         -pan_day)

  filter(df, gender == "female")
  
  df %>% 
    select(age,
           gender,
           rec_ver_tat,
           pan_day) %>% 
    filter(df, gender == "female")
  
  #Remove Missing Data
    df_cleaned <- na.omit(df)
    view(df_cleaned)
  
  #Reclassification
    df_cleaned <- df_cleaned %>%
      mutate(waktu_pandemi = case_when(
        pan_day < 34 ~ 1,
        pan_day > 33 & pan_day< 67 ~ 2,
        pan_day > 66 ~ 3,
      ))
    
    view(df_cleaned)
    
    #Labeling Kelompok
    df_cleaned$waktu_pandemi <- 
      factor(df_cleaned$waktu_pandemi, 
             levels=c(1,2,3),
             labels=c("1st 33-days", # Reference
                      "2nd 33-days", 
                      "3rd 33-days"))
    
    df_cleaned$drive_thru_ind <- 
      factor(df_cleaned$drive_thru_ind, 
             levels=c(1,0),
             labels=c("Yes", 
                      "No"))
    
    view(df_cleaned)
    glimpse(df_cleaned)
    
  #Remove Outliers Collect to Receive
    summary(df_cleaned$col_rec_tat)
    
    length(df_cleaned$col_rec_tat)
    #8303
    
    df_cleaned$zscore_col_rec_tat <- (abs(df_cleaned$col_rec_tat-
                                mean(df_cleaned$col_rec_tat))/
                            sd(df_cleaned$col_rec_tat))
    
    view(df_cleaned)

    df_cleaned <- 
      subset(df_cleaned, 
             df_cleaned$zscore_col_rec_tat < 3)
    
    length(df_cleaned$col_rec_tat)
    #8302
    
    #Remove Outliers Receive to Verification
    summary(df_cleaned$rec_ver_tat)
    
    length(df_cleaned$rec_ver_tat)
    #8302
    
    df_cleaned$zscore_rec_ver_tat <- (abs(df_cleaned$rec_ver_tat-
                                          mean(df_cleaned$rec_ver_tat))/
                                      sd(df_cleaned$rec_ver_tat))
    
    
    df_cleaned <- subset(df_cleaned, df_cleaned$zscore_rec_ver_tat < 3)
    
    length(df_cleaned$rec_ver_tat)
    #8237

#Exploratory
library(ggpubr)
    
    #Collect to Receive based on Days
        #Buat Frekuensi Per Hari  
        df_freq_col_rec_tat <- df_cleaned %>%
          group_by(pan_day) %>%
          summarize(Freq = n()) 
        
        view(df_mean_col_rec_tat)
    
        #Buat Mean Per Hari
       df_mean_col_rec_tat <- df_cleaned %>%
          group_by(pan_day) %>%
          summarise_at(vars(col_rec_tat), list(name = mean))
       
       view(df_mean_col_rec_tat)
        #Gabungin Data Frame
        df_col_rec_tat_viz <- cbind(df_freq_col_rec_tat,df_mean_col_rec_tat)
        df_col_rec_tat_viz
        
        df_mean_col_rec_tat <- df_mean_col_rec_tat %>% 
          select(name) %>% 
          rename(mean = name)
        
        #Visual Collect to Receive
        ggplot(df_col_rec_tat_viz, aes(x = pan_day, 
                                     y = mean)) +
          geom_point(alpha=0.6, aes(size = Freq,
                                    color = Freq)) +
          labs(color = "Number of Patients", y = "Mean (hours)", x = "-Day Pandemic") +
          theme_bw() 

    #Receive to Verification based on Days
        #Buat Frekuensi Per Hari  
        df_freq_rec_ver_tat <- df %>%
          group_by(pan_day) %>%
          summarize(Freq = n()) 
        
        #Buat Mean Per Hari
        df_mean_rec_ver_tat <- df %>%
          group_by(pan_day) %>%
          summarise_at(vars(rec_ver_tat), list(name = mean))
        
        df_mean_rec_ver_tat <- df_mean_rec_ver_tat %>% 
          select(name) %>% 
          rename(mean = name)
        
        view(df_mean_rec_ver_tat)
        
        #Gabungin Data Frame
        df_rec_ver_tat_viz <- cbind(df_freq_rec_ver_tat,df_mean_rec_ver_tat)
        df_rec_ver_tat_viz
        
        #Visual Collect to Receive
        ggplot(df_rec_ver_tat_viz, aes(x = pan_day, 
                                     y = mean)) +
          geom_point(alpha=0.6, aes(size = Freq,
                                   color = Freq)) +
          labs(fill = "Number of Patients", y = "Mean (hours)", x = "-Day Pandemic") +
          theme_bw() 
        
  #Data Berdasarkan Kelompok  
       #Visual Dasar Colect to Receive
       ggbarplot(df_cleaned, x = "waktu_pandemi",
                  y = "col_rec_tat", 
                  add = "mean_se",
                  color = "waktu_pandemi", palette = "jco") +
          stat_compare_means(label.y = 3.5)
    
       #Visual Receive to Verification + Signifikansi
          ggbarplot(df_cleaned, x = "waktu_pandemi", y = "rec_ver_tat", add = "mean_se",
                    color = "waktu_pandemi", palette = "jco") + 
         stat_compare_means(method = "anova",label.y = 6)
    
#Descriptive
install.package("table1")
library(table1)
   
    #Tabel Deskriptif
    tabel_deskriptif <- table1(~ age + 
                                 gender + 
                                 result +
                                 payor_group + 
                                 demo_group + 
                                 drive_thru_ind +
                                 col_rec_tat +
                                 rec_ver_tat | waktu_pandemi,
            data = df_cleaned, 
            topclass="Rtable1-zebra")

    tabel_deskriptif

    #Ganti Label di Tabel Deksriptif
    label(df_cleaned$gender)       <- "Sex"
    label(df_cleaned$age)       <- "Age"
    label(df_cleaned$result)     <- "Test Result"
    label(df_cleaned$payor_group) <- "Payor"
    label(df_cleaned$demo_group) <- "Demographic Group"
    label(df_cleaned$drive_thru_ind) <- "Drive Thru"
    label(df_cleaned$col_rec_tat) <- "Colect to Receive Time"
    label(df_cleaned$rec_ver_tat) <- "Receive to Verification Time"
    
    units(df_cleaned$age)               <- "years"
    units(df_cleaned$rec_ver_tat)       <- "hours"
    units(df_cleaned$col_rec_tat)       <- "hours"
  


#Statistic
library(rstatix)
    #Normality test
    normality <- df_cleaned %>% 
      group_by(waktu_pandemi) %>% 
     shapiro_test(col_rec_tat) %>% 
      add_significance()
    
    normality
    
    #Kruskall-Walis
    kruskal.test(col_rec_tat ~ waktu_pandemi, data = df_cleaned)
    
    #Cross-Tabulation
    library(sjPlot)
    sjPlot::tab_xtab(var.row = df_cleaned$drive_thru_ind, 
                     var.col = df_cleaned$result, 
                     title = "Table Title", show.row.prc = TRUE)