#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 600
#|
library(shiny)
library(ggplot2)
library(kableExtra)
ui <- fluidPage(
titlePanel("ANOVA Simulation"),
sidebarLayout(
sidebarPanel(
numericInput("n_groups", "Number of Groups", value = 4, min = 2),
uiOutput("group_mean_sliders"),
numericInput("n_per_group", "Sample Size per Group", value = 30, min = 5),
numericInput("within_var", "Within Group Variability (SD)", value = 5, min = 1),
numericInput("alpha", "Significance Level (α):", value = 0.05, min = 0.001, max = 1, step = 0.01),
actionButton("draw_sample", "Draw Sample"),
actionButton("draw_sample_100", "Draw 100 Samples"),
radioButtons("plot_type", "Select Plot Type:",
choices = c("Density Plots" = "density", "Boxplots" = "boxplot"),
selected = "boxplot"),
actionButton("reset", "Reset")
),
mainPanel(
plotOutput("combined_plot"),
h4("ANOVA Table Summary"),
tableOutput("anova_table"),
textOutput("anova_result"),
plotOutput("f_dist_plot"),
h4("Observed F-statistics Over Time"),
plotOutput("f_history_plot")
)
)
)
server <- function(input, output, session) {
# Reactive value to store F-statistics history
f_history <- reactiveVal(data.frame(draw = numeric(), f_stat = numeric()))
# Mutable reactive to store current sample (NULL when none)
sample_data <- reactiveVal(NULL)
# Population data sampling large size (e.g., 10,000 per group)
pop_data <- reactive({
means <- sapply(1:input$n_groups, function(i) input[[paste0("mean_", i)]])
group <- rep(1:input$n_groups, each = 10000)
values <- unlist(lapply(1:input$n_groups, function(i) {
rnorm(10000, mean = means[i], sd = input$within_var)
}))
data.frame(group = as.factor(group), values = values)
})
# When user clicks "Draw Sample" produce a new sample and store it
observeEvent(input$draw_sample, {
isolate({
means <- sapply(1:input$n_groups, function(i) input[[paste0("mean_", i)]])
group <- rep(1:input$n_groups, each = input$n_per_group)
values <- unlist(lapply(1:input$n_groups, function(i) {
rnorm(input$n_per_group, mean = means[i], sd = input$within_var)
}))
samp <- data.frame(group = as.factor(group), values = values)
sample_data(samp)
# compute F and append to history
fit <- aov(values ~ group, data = samp)
f_stat <- summary(fit)[[1]]$`F value`[1]
current <- f_history()
new_row <- data.frame(draw = ifelse(nrow(current)==0, 1, max(current$draw)+1),
f_stat = f_stat)
f_history(rbind(current, new_row))
})
})
# Draw 100 samples: append 100 F-stats to history and set sample_data to the last one
observeEvent(input$draw_sample_100, {
isolate({
means <- sapply(1:input$n_groups, function(i) input[[paste0("mean_", i)]])
new_f_stats <- replicate(100, {
group <- rep(1:input$n_groups, each = input$n_per_group)
values <- unlist(lapply(1:input$n_groups, function(i) {
rnorm(input$n_per_group, mean = means[i], sd = input$within_var)
}))
data <- data.frame(group = as.factor(group), values = values)
fit <- aov(values ~ group, data = data)
summary(fit)[[1]]$`F value`[1]
})
# Append to history
current <- f_history()
n_existing <- ifelse(nrow(current) == 0, 0, max(current$draw))
new_rows <- data.frame(
draw = seq(n_existing + 1, n_existing + 100),
f_stat = new_f_stats
)
f_history(rbind(current, new_rows))
# Optionally set sample_data to the last simulated sample (so UI shows a sample)
# If you prefer NOT to set sample_data here, comment the next block out.
last_group <- rep(1:input$n_groups, each = input$n_per_group)
last_values <- unlist(lapply(1:input$n_groups, function(i) {
rnorm(input$n_per_group, mean = means[i], sd = input$within_var)
}))
sample_data(data.frame(group = as.factor(last_group), values = last_values))
})
})
# ANOVA summary table (Between/Within) — only render when we have a sample
output$anova_table <- renderUI({
req(!is.null(sample_data()))
req(nrow(sample_data()) > 0)
fit <- aov(values ~ group, data = sample_data())
anova_res <- summary(fit)[[1]]
ss_between <- anova_res$`Sum Sq`[1]
ss_within <- anova_res$`Sum Sq`[2]
df_between <- anova_res$Df[1]
df_within <- anova_res$Df[2]
ms_between <- ss_between / df_between
ms_within <- ss_within / df_within
table_df <- data.frame(
Source = c("Between Groups", "Within Groups"),
`Sum of Squares (SS)` = round(c(ss_between, ss_within), 3),
`Degrees of Freedom (df)` = c(df_between, df_within),
`Mean Square (MS)` = round(c(ms_between, ms_within), 3)
)
table_html <- table_df %>%
knitr::kable("html", align = "lccc",
caption = "ANOVA Summary Table",
col.names = c("Source", "Sum of Squares", "Degrees of Freedom","Variance Estimate")) %>%
kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))
HTML(table_html)
})
# Reset button clears sample and history (no assignment to eventReactive)
observeEvent(input$reset, {
f_history(data.frame(draw = numeric(), f_stat = numeric()))
sample_data(NULL)
# Optionally also reset inputs (uncomment if you want)
# updateNumericInput(session, "n_groups", value = 4)
# updateNumericInput(session, "n_per_group", value = 30)
# updateNumericInput(session, "within_var", value = 5)
# updateNumericInput(session, "alpha", value = 0.05)
# Reset sliders for means to 2 (if you prefer)
# lapply(1:input$n_groups, function(i) {
# updateSliderInput(session, paste0("mean_", i), value = 2)
# })
})
# Plot of F-statistics over time
output$f_history_plot <- renderPlot({
req(nrow(f_history()) > 0)
mx = max(qf(0.999, df1 = input$n_groups - 1, df2 = input$n_groups*(input$n_per_group - 1)),
max(f_history()$f_stat))
df1 <- input$n_groups - 1
df2 <- input$n_groups * (input$n_per_group - 1)
x <- seq(0, max(max(f_history()$f_stat), 10), length.out = 500)
f_density <- df(x, df1, df2)
f_df <- data.frame(x = x, density = f_density)
crit_f <- qf(1 - input$alpha, df1, df2)
ggplot(f_history(), aes(x = f_stat)) +
geom_histogram(aes(y = ..density..),
bins = 30,
color = "black",
fill = "lavender",
alpha = 0.7) +
geom_vline(xintercept = qf(1-input$alpha, df1 = input$n_groups - 1, df2 = input$n_groups*(input$n_per_group - 1) ),
color = "firebrick4")+
geom_area(data = f_df[f_df$x >= crit_f, ], aes(x = x, y = density),
fill = "firebrick4", alpha = 0.4) +
geom_line(data = f_df,
aes(x = x, y = density),
color = "red",
size = 1.2) +
theme_minimal() +
labs(
title = "Observed F-statistics vs Theoretical F-Distribution",
x = "F value",
y = "Density"
)
})
# Dynamically generate sliders for group means (unchanged)
output$group_mean_sliders <- renderUI({
lapply(1:input$n_groups, function(i) {
sliderInput(paste0("mean_", i), paste0("Mean for Group ", i),
min = -10, max = 10, value = 2, step = 0.1)
})
})
# Combined plot for population and sample distributions
output$combined_plot <- renderPlot({
if (input$plot_type == "density") {
p <- ggplot(pop_data(), aes(x = values, fill = group)) +
geom_density(alpha = 0.4) +
facet_wrap(~ group, ncol = 1) +
labs(title = "Population Distributions", y = "Density", x = "Values") +
theme_minimal() +
theme(legend.position = "none")
} else {
p <- ggplot(pop_data(), aes(y = values, x = group, color = group)) +
geom_boxplot(alpha = 0.4) +
labs(title = "Population Distributions", y = "Values", x = "Group") +
theme_minimal() +
theme(legend.position = "none")
}
# Only add sample layers if we have a sample
if (!is.null(sample_data())) {
samp <- sample_data()
if (nrow(samp) > 0) {
if (input$plot_type == "density") {
p <- p +
geom_histogram(data = samp, aes(x = values, fill = group, y = ..density..),
color = "black", alpha = 0.6, position = "identity", bins = 30) +
geom_vline(aes(xintercept = mean(values)), data = samp,
linetype = "dashed", color = "blue", size = 1) +
labs(subtitle = "+ Sample Distributions")
} else {
p <- p +
geom_boxplot(data = samp, aes(x = group, y = values, fill = group), color = "black",
alpha = 0.7, position = position_dodge(width = 0.8))+
geom_hline(aes(yintercept = mean(values)), data = samp,
linetype = "dashed", color = "blue", size = 1) +
labs(subtitle = "+ Sample Distributions")
}
}
}
print(p)
})
# Perform ANOVA and display F statistic — only when sample exists
output$anova_result <- renderText({
req(!is.null(sample_data()))
req(nrow(sample_data()) > 0)
fit <- aov(values ~ group, data = sample_data())
anova_res <- summary(fit)
f_stat <- anova_res[[1]]$`F value`[1]
p_value <- anova_res[[1]]$`Pr(>F)`[1]
paste("ANOVA F-statistic:", round(f_stat, 3), "with p-value:", round(p_value, 3))
})
# Plot F distribution with shaded p-value — only when sample exists
output$f_dist_plot <- renderPlot({
req(!is.null(sample_data()))
req(nrow(sample_data()) > 0)
fit <- aov(values ~ group, data = sample_data())
f_stat <- summary(fit)[[1]]$`F value`[1]
p_value <- summary(fit)[[1]]$`Pr(>F)`[1]
df1 <- input$n_groups - 1
df2 <- input$n_groups * (input$n_per_group - 1)
x <- seq(0, max(10, f_stat + 5), length.out = 500)
f_dist <- df(x, df1, df2)
f_df <- data.frame(x = x, y = f_dist)
crit_f <- qf(1 - input$alpha, df1, df2)
ggplot(f_df, aes(x = x, y = y)) +
geom_line(size = 1) +
geom_area(data = f_df[f_df$x >= crit_f, ], aes(x = x, y = y),
fill = "firebrick4", alpha = 0.4) +
geom_area(data = f_df[f_df$x >= f_stat, ], aes(x = x, y = y),
fill = "purple", alpha = 0.4) +
geom_vline(xintercept = crit_f, linetype = "dashed", color = "firebrick4", size = 1) +
geom_vline(xintercept = f_stat, linetype = "dashed", color = "forestgreen", size = 1) +
annotate("text", x = f_stat + 0.5, y = max(f_dist)/2,
label = paste0("F = ", round(f_stat, 3)),
color = "forestgreen") +
labs(title = "F Distribution with Rejection Region & p-value",
x = "F value",
y = "Density") +
theme_bw()
})
}
shinyApp(ui = ui, server = server)