#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 600
library(shiny)
library(tidyverse)
library(cowplot)
library(shinyvalidate)
# Define UI for application with tabs
ui <- fluidPage(
# Application title
titlePanel("Regression: Confidence and Prediction Intervals"),
tabsetPanel(
# ---- FIRST TAB ----
tabPanel("Confidence Interval for Regression Line",
sidebarLayout(
sidebarPanel(
sliderInput("true_intercept", "True Intercept", min = -10, max = 10, value = 2, step = 0.1),
sliderInput("true_slope", "True Slope", min = -2, max = 2, value = 0.5, step = 0.01),
numericInput("true_sigma", "True Error SD", value = 2, min = 0.1, max = 10, step = 0.1),
numericInput("alpha", "Significance Level", value = 0.05, min = 0, max = 1),
numericInput("n", "Sample size", min = 5, max = 1000, value = 30),
actionButton("sampleBtn", "Draw Sample"),
actionButton("resetBtn", "Reset")
),
mainPanel(
tabsetPanel(
tabPanel("Single Sample", plotOutput("samplePlot")),
tabPanel("All Samples", plotOutput("allSamplesPlot"))
)
)
)
),
# ---- SECOND TAB (unchanged) ----
tabPanel("Confidence vs Prediction Intervals",
sidebarLayout(
sidebarPanel(
sliderInput("xval", "Choose X value", value = 25, min = 18, max = 40, step = 1),
radioButtons("interval_type", "Confidence vs Prediction Interval",
c("Confidence Interval" = "ci", "Prediction Interval" = "pi"),
selected = "ci")
),
mainPanel(
tabsetPanel(
tabPanel("Single Sample", plotOutput("intervalPlot")),
tabPanel("Multiple Samples", plotOutput("intervalWithLinesPlot"))
)
)
)
)
)
)
# ---- SERVER ----
server <- function(input, output, session) {
# Reactive values to store all fitted regression lines
simdata <- reactiveValues(
lines = tibble(intercept = numeric(), slope = numeric())
)
# Generate a new sample when "Sample" is clicked
new_sample <- eventReactive(input$sampleBtn, {
x <- runif(input$n, 18, 40)
y <- input$true_intercept + input$true_slope * x + rnorm(input$n, sd = input$true_sigma)
tibble(x, y)
})
# Store sample data as well for visualization
observeEvent(input$sampleBtn, {
dat <- new_sample()
fit <- lm(y ~ x, data = dat)
coefs <- coef(fit)
# Store estimated line
simdata$lines <- bind_rows(simdata$lines,
tibble(intercept = coefs[1], slope = coefs[2]))
# Store sample data
simdata$samples <- append(simdata$samples, list(dat))
})
# Reset all stored lines and data
observeEvent(input$resetBtn, {
simdata$lines <- tibble(intercept = numeric(), slope = numeric())
simdata$samples <- list()
})
# ---- Plot: current sample ----
output$samplePlot <- renderPlot({
req(new_sample())
dat <- new_sample()
mdl <- lm(y ~ x, data = dat)
newx <- tibble(x = seq(min(dat$x), max(dat$x), length.out = 100))
preds <- predict(mdl, newdata = newx, interval = "confidence", level = 1 - input$alpha)
newx <- bind_cols(newx, as.data.frame(preds))
ggplot(dat, aes(x = x, y = y)) +
geom_point(size = 2, alpha = 0.6) +
geom_abline(intercept = input$true_intercept, slope = input$true_slope,
color = "blue", linetype = "dashed", size = 1.2) +
geom_line(data = newx, aes(y = fit), color = "red", size = 1) +
geom_ribbon(data = newx, aes(x = x, ymin = lwr, ymax = upr),
inherit.aes = FALSE, alpha = 0.2, fill = "red") +
labs(title = "Current Sample",
x = "X", y = "Y") +
theme_bw(base_size = 14)
})
# ---- Plot: all regression lines ----
output$allSamplesPlot <- renderPlot({
df <- as.data.frame(simdata$lines)
x_range <- c(15, 43)
# Compute y-range for the true line + estimated lines
true_y <- input$true_intercept + input$true_slope * x_range
if (nrow(df) > 0) {
# Get y-values of each estimated line at x_range
y_vals <- t(apply(df, 1, function(r) {
as.numeric(r["intercept"]) + as.numeric(r["slope"]) * x_range
}))
all_y_min <- min(y_vals, true_y)
all_y_max <- max(y_vals, true_y)
} else {
all_y_min <- min(true_y)
all_y_max <- max(true_y)
}
ymarg <- 0.1 * (all_y_max - all_y_min + 1e-6)
y_limits <- c(all_y_min - ymarg, all_y_max + ymarg)
# Base data frame to define the plotting area
base_df <- data.frame(x = x_range, y = y_limits)
ggplot(base_df, aes(x, y)) +
geom_blank() +
coord_cartesian(xlim = x_range, ylim = y_limits) +
# Estimated regression lines (red)
geom_abline(data = df, aes(intercept = intercept, slope = slope),
color = "red", alpha = 0.4, size = 1) +
# True regression line (blue)
geom_abline(intercept = input$true_intercept, slope = input$true_slope,
color = "blue", linetype = "dashed", size = 1.2) +
labs(title = paste("All Estimated Regression Lines (", nrow(df), " samples )"),
x = "X", y = "Y") +
theme_bw()
})
# ---- Second tab (unchanged) ----
vals <- reactive({
x <- runif(input$n, min = 18, max = 40)
y <- input$true_intercept + input$true_slope * x + rnorm(input$n, sd = input$true_sigma)
data.frame(x = x, y = y)
})
output$intervalPlot <- renderPlot({
req(new_sample())
vals <- new_sample()
mdl <- lm(y ~ x, data = vals)
pred <- predict(mdl, newdata = data.frame(x = input$xval),
interval = "predict", level = 1 - input$alpha)
conf <- predict(mdl, newdata = data.frame(x = input$xval),
interval = "confidence", level = 1 - input$alpha)
mean_y <- pred[1]
if (input$interval_type == "ci") {
sd_y <- (conf[3] - conf[2]) /(2 * qt(1 - input$alpha / 2, df = input$n - 2))
clr <- "purple3"
ymn <- conf[2]
ymx <- conf[3]
} else {
sd_y <- (pred[3] - pred[2]) / (2 * qt(1 - input$alpha / 2, df = input$n - 2))
clr <- "magenta"
ymn <- pred[2]
ymx <- pred[3]
}
p <- ggplot(vals, aes(x = x, y = y)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, level = 1 - input$alpha, color = "black") +
geom_errorbar(aes(x = input$xval, ymin = ymn, ymax = ymx), color = clr, width = 0.2) +
geom_point(aes(x = input$xval, y = pred[1]), color = "purple", size = 3) +
labs(title = "Regression Line and Confidence/Prediction Intervals",
x = "X", y = "Y") +
theme_bw() +
geom_vline(aes(xintercept = input$xval), color = "purple",
linewidth = 0.5, linetype = 2) +
geom_abline(intercept = input$true_intercept, slope = input$true_slope,
color = "blue", linetype = 2)
y_density <- axis_canvas(p, axis = "y", coord_flip = TRUE) +
geom_function(fun = dnorm, args = list(mean = pred[1], sd = sd_y), color = clr) +
coord_flip()
combined_plot <- insert_yaxis_grob(p, y_density, position = "right")
ggdraw(combined_plot)
})
output$intervalWithLinesPlot <- renderPlot({
req(length(simdata$samples) > 0)
# Use the last sample to get CI/PI
last_sample <- tail(simdata$samples, 1)[[1]]
mdl <- lm(y ~ x, data = last_sample)
if (input$interval_type == "ci") {
preds <- predict(mdl, newdata = data.frame(x = input$xval),
interval = "confidence", level = 1 - input$alpha)
} else {
preds <- predict(mdl, newdata = data.frame(x = input$xval),
interval = "predict", level = 1 - input$alpha)
}
# x grid for plotting lines
x_grid <- seq(18, 40, length.out = 100)
# build data frame for all sample estimated lines
lines_df <- simdata$lines %>%
mutate(id = row_number()) %>%
group_by(id) %>%
summarise(x = list(x_grid),
y = list(intercept + slope * x_grid)) %>%
unnest(c(x, y))
ggplot() +
# All sample lines
geom_line(data = lines_df, aes(x = x, y = y, group = id),
color = "red", alpha = 0.3) +
# All sample points
geom_point(data = bind_rows(simdata$samples, .id = "sample_id"),
aes(x = x, y = y), alpha = 0.4, size = 1.5) +
# True regression line
geom_abline(intercept = input$true_intercept, slope = input$true_slope,
color = "blue", linetype = "dashed", size = 1.2) +
# CI/PI bar at xval
geom_errorbar(aes(x = input$xval, ymin = preds[2], ymax = preds[3]),
color = "purple", width = 0.2) +
labs(title = "Sample Regression Lines",
x = "X", y = "Y") +
theme_bw(base_size = 14)
})
}
# ---- RUN APP ----
shinyApp(ui = ui, server = server)