Simulating Data for Power • scienceverse

suppressPackageStartupMessages({
  library(scienceverse)
  library(faux)
  library(tidyr)
  library(dplyr)
  library(ggplot2)
})

First, I use the sim_design() function from faux to visualise the design for my proposed study. I’m simulating 30 dog owners with average happiness score of 10 and an average smugness score of 5, and 30 cat owners with an average happiness score of 9 and an average smugness score of 6. Since all have an SD of 2, these are effect sizes of d = 0.5.

simdat <- sim_design(
  between = list(pet = c("dog", "cat")),
  within = list(emotion = c("happy", "smug")),
  n = 30, # in each between-subject cell
  mu = c(10, 5, 9, 6),
  sd = 2
)

I can then use this simulated data to plan my analyses.

I’ll use a one-sided, two-sample t-test against a mu of 0 to test if my hypothesis that dog owners are happier than cat owners is corroborated

dog_happy <- simdat$happy[simdat$pet == "dog"]
cat_happy <- simdat$happy[simdat$pet == "cat"]
t.test(dog_happy, cat_happy, mu = 0, 
       alternative = "greater")
#> 
#>  Welch Two Sample t-test
#> 
#> data:  dog_happy and cat_happy
#> t = 2.4711, df = 53.854, p-value = 0.008333
#> alternative hypothesis: true difference in means is greater than 0
#> 95 percent confidence interval:
#>  0.4669045       Inf
#> sample estimates:
#> mean of x mean of y 
#> 10.444280  8.997448

I’ll use a one-sided, two-sample t-test against a mu of 0.2 (my smallest effect size of interest; SESOI) to assess falsification of my hypothesis.

dog_happy <- simdat$happy[simdat$pet == "dog"]
cat_happy <- simdat$happy[simdat$pet == "cat"]
t.test(dog_happy, cat_happy, mu = 0.2, 
       alternative = "less")
#> 
#>  Welch Two Sample t-test
#> 
#> data:  dog_happy and cat_happy
#> t = 2.1295, df = 53.854, p-value = 0.9811
#> alternative hypothesis: true difference in means is less than 0.2
#> 95 percent confidence interval:
#>     -Inf 2.42676
#> sample estimates:
#> mean of x mean of y 
#> 10.444280  8.997448

I’ll use a one-sided, two-sample t-test against a mu of 0 to test if my hypothesis that cat owners are smugger than dog owners is corroborated.

dog_smug <- simdat$smug[simdat$pet == "dog"]
cat_smug <- simdat$smug[simdat$pet == "cat"]
t.test(cat_smug, dog_smug, mu = 0, 
       alternative = "greater")
#> 
#>  Welch Two Sample t-test
#> 
#> data:  cat_smug and dog_smug
#> t = 1.7868, df = 52.936, p-value = 0.03985
#> alternative hypothesis: true difference in means is greater than 0
#> 95 percent confidence interval:
#>  0.06651642        Inf
#> sample estimates:
#> mean of x mean of y 
#>  6.035758  4.980895

I’ll use a one-sided, two-sample t-test against a mu of 0.2 (my smallest effect size of interest; SESOI) to assess falsification of my hypothesis.

dog_smug <- simdat$smug[simdat$pet == "dog"]
cat_smug <- simdat$smug[simdat$pet == "cat"]
t.test(cat_smug, dog_smug, mu = 0.2, 
       alternative = "less")
#> 
#>  Welch Two Sample t-test
#> 
#> data:  cat_smug and dog_smug
#> t = 1.448, df = 52.936, p-value = 0.9232
#> alternative hypothesis: true difference in means is less than 0.2
#> 95 percent confidence interval:
#>      -Inf 2.043209
#> sample estimates:
#> mean of x mean of y 
#>  6.035758  4.980895

study <- study("Simple Simulation Demo")  %>%
  add_hypothesis("happy", "Dog owners will be happier than cat owners.") %>%
  add_analysis("happy_main", {
    dog_happy <- pet_data$happy[pet_data$pet == "dog"]
    cat_happy <- pet_data$happy[pet_data$pet == "cat"]
    t.test(dog_happy, cat_happy, mu = 0, alternative = "greater")
  }) %>%
  add_criterion("happy_c", "p.value", "<", 0.05) %>%
  add_analysis("happ_equiv", {
    dog_happy <- pet_data$happy[pet_data$pet == "dog"]
    cat_happy <- pet_data$happy[pet_data$pet == "cat"]
    t.test(dog_happy, cat_happy, mu = 0.2, alternative = "less")
  }) %>%
  add_criterion("happy_f", "p.value", "<", 0.05) %>%
  add_eval("corroboration", "happy_c", "The hypothesis will be corroborated if dog owners are significantly happier than cat owners.") %>%
  add_eval("falsification", "happy_f", "The hypothesis will be falsified if dog owners are significantly less than 0.2 units happier than cat owners.")

Add a second hypothesis, with all associated analyses, criteria, and evaluation rules.

study <- study %>%
  add_hypothesis("smug", "Cat owners will be smugger than dog owners.") %>%
  add_analysis("smug_main", {
    dog_smug <- pet_data$smug[pet_data$pet == "dog"]
    cat_smug <- pet_data$smug[pet_data$pet == "cat"]
    t.test(cat_smug, dog_smug, mu = 0, alternative = "greater")
  }) %>%
  add_criterion("smug_c", "p.value", "<", 0.05) %>%
  add_analysis("smug_equiv", {
    dog_smug <- pet_data$smug[pet_data$pet == "dog"]
    cat_smug <- pet_data$smug[pet_data$pet == "cat"]
    t.test(cat_smug, dog_smug, mu = 0.2, alternative = "less")
  }) %>%
  add_criterion("smug_f", "p.value", "<", 0.05) %>%
  add_eval("corroboration", "smug_c", "The hypothesis will be corroborated if cat owners are significantly smugger than dog owners.") %>%
  add_eval("falsification", "smug_f", "The hypothesis will be falsified if cat owners are significantly less than 0.2 units smugger than dog owners.")

Now we can simulate a dataset. Make sure to give it the same data_id that you used to reference it in the analysis.

simstudy <- add_sim_data(
  study,
  data_id = "pet_data",
  between = list(pet = c("dog", "cat")),
  within = list(emotion = c("happy", "smug")),
  n = 30, # in each between-subject cell
  mu = c(10, 5, 9, 6),
  sd = 2
) %>%
  study_analyse()
#> id set to dataType string
#> pet set to dataType string
#> happy set to dataType float
#> smug set to dataType float
#> Hypothesis happy: Dog owners will be happier than cat owners.
#> 
#> Criterion happy_c:
#> * p.value < 0.05 is FALSE
#> * p.value = 0.131
#> 
#> Criterion happy_f:
#> * p.value < 0.05 is FALSE
#> * p.value = 0.774
#> 
#> Conclusion: inconclusive
#> * Corroborate (happy_c): FALSE
#> * Falsify (happy_f): FALSE
#> 
#> Hypothesis smug: Cat owners will be smugger than dog owners.
#> 
#> Criterion smug_c:
#> * p.value < 0.05 is FALSE
#> * p.value = 0.201
#> 
#> Criterion smug_f:
#> * p.value < 0.05 is FALSE
#> * p.value = 0.687
#> 
#> Conclusion: inconclusive
#> * Corroborate (smug_c): FALSE
#> * Falsify (smug_f): FALSE

Use the study_power function to run the analyses on each simulated dataset and return the percent of datasets that lead to each possible conclusion for each hypothesis. You can also increase the number of replications in the simulation to calculate the power of your study. This is especially helpful for studies with complex analyses or evaluation criteria.

simstudy <- study_power(simstudy, rep = 1000)
#> Simulating Datasets...
#> Running Analyses...
#> Evaluating Hypotheses...
#> Hypothesis happy
#>       corroboration: 62.3%
#>       falsification: 0.1%
#>       inconclusive:  37.6%
#> Hypothesis smug
#>       corroboration: 59.4%
#>       falsification: 0.1%
#>       inconclusive:  40.5%

Use the get_power function to get the results in a list format. Set values to TRUE to get the individual values for the analysis results.

power <- get_power(simstudy, values = TRUE)

# get power for each hypothesis
# corroboration, falsification, or inconclusive
happy_power <- power$power$happy
smug_power <- power$power$smug

# get values for specific analysis results
happy_c <- power$results$happy_main$p.value
smug_c <- power$results$smug_main$p.value