Sampling Activity

In-class activity to simulate a sampling distribution
Sampling Distributions
Author

Haley Grant

#| '!! shinylive warning !!': |
#|   shinylive does not work in self-contained HTML documents.
#|   Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 1000

# ui.R
library(shiny)
library(plotly)
library(tidyverse)

ui <- fluidPage(
  titlePanel("Sampling Distribution Simulation"),
  sidebarLayout(
    sidebarPanel(
      fileInput("file1", "Choose CSV File", accept = ".csv"),
      uiOutput("columnSelectUI"),  # Dropdown for selecting the column
      numericInput("sampleSize", "Sample size:", min = 1, value = 10),
      numericInput("numSamples", "Number of resamples:", min = 1, value = 1000),
      actionButton("update", "Draw Samples")
      
    ),
    mainPanel(
      fluidRow(
        conditionalPanel("output.showHeader==true" ,
                         h3("Original Data Distriubtion", align = "center")),
        splitLayout(cellWidths = c("50%", "50%"), 
                    plotlyOutput("dataPlot"),  # Interactive scatter plot for data points
                    plotOutput("dataHist"))    # Histogram for data distribution
      ),
      actionButton("removePoint", "Remove Selected Point"),
      conditionalPanel(
        condition = "output.showSimPanel == true", 
        h3("Simulated Samples",align = "center"),
        numericInput("index", "Index of resample to plot", value = 1, min = 1, max = 1000)
      ,
      fluidRow(
        splitLayout(cellWidths = c("50%", "50%"), cellArgs = list(align = "center", style = "vertical-align: middle"),
                    plotOutput("samplePlot", width = "80%", height = "325px"),  # Plot of specific sample 
                    plotOutput("distPlot"))   # Plot for sampling distribution of sample means
      ),
      tableOutput("stats"))
    )
  )
)

server <- function(input, output, session) {
  data <- reactiveVal(data.frame())
  counter <- reactiveVal(0)
  showSimPanel <- reactiveVal(FALSE)
  showHeader <- reactiveVal(FALSE)
  
  observeEvent(input$file1, {
    req(input$file1)
    df <- read.csv(input$file1$datapath)
    data(df)
    
    counter(0)
    showSimPanel(FALSE)  # Hide the Simulated Samples section
    showHeader(TRUE)
    # Update the dropdown with column names after data is loaded
    updateSelectInput(session, "colName", choices = colnames(df))
     updateNumericInput(session, "index", value = 1)
     
    # Clear any previous plots or outputs
  output$samplePlot <- renderPlot({ NULL })
  output$distPlot <- renderPlot({ NULL })
  output$sampleMeans <- renderTable({ NULL })
  output$stats <- renderTable({ NULL })
  })
 output$showHeader <- reactive({
    showHeader()
  })
  
  outputOptions(output, "showHeader", suspendWhenHidden = FALSE) 
  
  # UI for selecting the column
  output$columnSelectUI <- renderUI({
    req(ncol(data()) > 0)  # Ensure there are columns in the data
    selectInput("colName", "Select Column:", choices = colnames(data()), selected = colnames(data())[1])
  })
  
  selected_point <- reactiveVal(NULL)  # Initialize as NULL
  
  observe({
    df <- data()
    req(input$colName %in% colnames(df))  # Ensure the column name exists
    
    output$dataPlot <- renderPlotly({
      req(nrow(df) > 0)  # Ensure there is data to plot
      plot_ly(df %>% mutate(index = row_number()) %>% highlight_key(~index), x = seq_len(nrow(df)),
              y = ~df[[input$colName]], type = 'scatter', mode = 'markers',
              marker = list(color = '#1f77b4', size = 8),
              text = ~paste("Index:", seq_len(nrow(df)), "<br>Value:", df[[input$colName]]),
              hoverinfo = "text") %>%
        layout(title = paste0("Data Points: ", input$colName),
               xaxis = list(title = "Index"),
               yaxis = list(title = "Value")) %>%
        highlight(on = "plotly_click", off = "plotly_doubleclick")
    })
    
    output$dataHist <- renderPlot({
      req(nrow(df) > 0)  # Ensure there is data to plot
      hist(df[[input$colName]], main = paste0("Population Distribution\n n = ", nrow(df)), xlab = "Values", col = "lightgreen", border = "white", breaks = 15)
    })
  })
  
  observeEvent(event_data("plotly_click"), {
    click_data <- event_data("plotly_click")
    if (!is.null(click_data) && !is.null(click_data$pointNumber)) {
      selected_point(click_data$pointNumber + 1)  # +1 to match R's 1-based indexing
    }
  })
  
  observeEvent(input$removePoint, {
    selected <- selected_point()
    req(!is.null(selected))  # Ensure a point is selected
    df <- data()
    if (nrow(df) > 0 && selected <= nrow(df)) {
      df <- df[-selected, , drop = FALSE]
      data(df)
      selected_point(NULL)  # Clear the selection after removal
    }
  })
  
  observeEvent(input$update,{
    counter(counter()+1)
    showSimPanel(TRUE) 
  })
   

  observeEvent({input$numSamples 
    input$removePoint 
    input$sampleSize 
    input$colName},{
  counter(0) 
    showSimPanel(FALSE) 
    updateNumericInput(session, "index", value = 1)
    })



output$showSimPanel <- reactive({
    showSimPanel()
  })
  
  outputOptions(output, "showSimPanel", suspendWhenHidden = FALSE) 
  observeEvent(counter(), {
    if(counter()>0){
    req(data())
    req(nrow(data()) > 0)  # Check if there are rows in the data
    req(input$sampleSize, input$numSamples, input$colName)
    df <- data()
    
    if (ncol(df) > 0) {
      sample_means <- numeric(input$numSamples)
      samples <- vector(length = input$numSamples, mode = "list")
      for (i in 1:input$numSamples) {
        sample_data <- df[sample(nrow(df), input$sampleSize, replace = TRUE), input$colName] %>% as.data.frame()
        samples[i] <- sample_data 
        sample_means[i] <- mean(sample_data[,1],na.rm=T)
      }
      
      output$samplePlot <- renderPlot({
        samples[[input$index]] %>%
          data.frame() %>%
          ggplot(aes(x = .)) +
          geom_dotplot(fill = "indianred1", color = "white") +
          theme_classic() +
          labs(x = "Values", title = paste0("Re-sample index: ", input$index)) +
          xlim(c(min(data()[[input$colName]]), max(data()[[input$colName]]))) +
          theme(axis.text.y = element_blank(),
                axis.title.y = element_blank(),
                axis.ticks = element_blank()) +
          geom_vline(xintercept = mean(samples[[input$index]]),
                     alpha = 0.5, color = "indianred1")
      })
      
      output$distPlot <- renderPlot({
        req(input$sampleSize > 0)
        hist(sample_means,
             breaks = seq(from = min(data()[[input$colName]]), to = max(data()[[input$colName]]),
                          length.out = round(15 * log(input$sampleSize))),
             main = "Sampling Distribution of Sample Means",
             xlab = "Sample Means", col = "lightblue", border = "white",
             xlim = c(min(data()[[input$colName]]), max(data()[[input$colName]])))
      })
      
      output$sampleMeans <- renderTable({
        data.frame(SampleMeans = sample_means)
      })
      
      output$stats <- renderTable({
        dt = data.frame(Mean = c(mean(data()[[input$colName]]), mean(sample_means)),
                        SD = c(sd(data()[[input$colName]]), sd(sample_means)))
        rownames(dt) <- c("Population", "Sample Means")
        dt
      }, bordered = T, rownames = T, align = "c")
    } }
  })
}

shinyApp(ui = ui, server = server)