### Main analysis of message testing experiment ### Scott Hershberger and Steven Moen ### begun 11-8-23, last updated 2-9-24 # Time the code tic <- Sys.time() ##### Read Me and User Inputs ##### # This code runs successfully on a 2020 M1 MacBook Pro with 16 GB of ram # in about 30 seconds. Please see exact system information at the bottom of this code, computed with sessionInfo() # You will need to update the working directory to where the data is stored. It's recommended to have # all the data input files in this folder together. Note that all of the exports from this code are saved in this folder. # Please update the filepath below working_directory_filepath = "~/Documents/Message_Testing_02092024/" ###### Libraries and Setup ##### setwd(working_directory_filepath) library(ggplot2) library(magrittr) library(faraway) library(emmeans) library(boot) library(dplyr) library("gridpattern") library("ggpattern") ## creating a function to turn aggregate dataframe into deaggregated (individual-level) data deaggregate_data <- function(aggregate_data){ # Initialize a list sto_list <- list() # Populate the data frame for (i in (1:24)) { vec_age <- as.data.frame(c(rep(aggregate_data$Age[i], aggregate_data$Impressions[i]))) vec_gender <- c(rep(aggregate_data$Gender[i], aggregate_data$Impressions[i])) vec_frame <- c(rep(aggregate_data$Frame[i], aggregate_data$Impressions[i])) vec_outcome <- c(rep(1,aggregate_data$Clicks[i]), rep(0, aggregate_data$Impressions[i] - aggregate_data$Clicks[i])) # Combine into a matrix out_mat <- cbind(vec_age, vec_gender, vec_frame, vec_outcome) # Store in a list sto_list[[i]] <- out_mat } # Convert into a matrix dataset <- sto_list[[1]] for (i in 2:24) { dataset <- rbind(dataset, sto_list[[i]]) } # Convert to a data frame dataset <- as.data.frame(dataset) # Change the names of the dataset colnames(dataset) <- c("Age", "Gender", "Frame", "Click") # Convert columns into proper data types dataset$Click <- as.numeric(dataset$Click) dataset$Age <- as.factor(dataset$Age) dataset$Gender <- as.factor(dataset$Gender) dataset$Frame <- as.factor(dataset$Frame) return(dataset) } ## format of aggregate data for input into deaggregate_data should have 5 columns: # aggregate_data[,1] is age # aggregate_data[,2] is gender # aggregate_data[,3] is frame # aggregate_data[,4] is the reach (denominator of the rate) # aggregate_data[,5] is the outcome variable (e.g., clicks) (numerator of the rate) #### loading and cleaning data #### message_test <- read.csv("Message-test.csv") ## removing unnecessary columns message_test <- message_test %>% select(-one_of('Reporting.starts', 'Reporting.ends', 'Cost.per.follow.or.like')) ## renaming columns to be easier to use colnames(message_test) <- c("ad_set", "dollars_spent", "impressions", "impressions_CPM", "reach", "reach_CPM", "frequency", "link_clicks_unique", "link_clicks_unique_CTR", "link_clicks_unique_CPC", "all_clicks_unique", "all_clicks_unique_CTR", "all_clicks_unique_CPC", "link_clicks_total", "link_clicks_total_CTR", "link_clicks_total_CPC", "all_clicks_total", "all_clicks_total_CTR", "all_clicks_total_CPC", "engagement_page", "engagement_post", "reactions", "comments", "shares", "saves", "follows_likes", "engagement_page_CP1", "engagement_post_CP1") ## replacing the percentage CTRs with raw proportion CTRs (max value 1) message_test$link_clicks_unique_CTR <- message_test$link_clicks_unique_CTR/100 message_test$all_clicks_unique_CTR <- message_test$all_clicks_unique_CTR/100 message_test$link_clicks_total_CTR <- message_test$link_clicks_total_CTR/100 message_test$all_clicks_total_CTR <- message_test$all_clicks_total_CTR/100 ## replacing NA values for comments, shares, saves, and follows/likes with 0 message_test <- message_test %>% replace(is.na(.), 0) ## adding columns for frame, age, and gender message_test$frame <- NA message_test$gender <- NA message_test$age <- NA for (i in 1:nrow(message_test)) { ## filling frame column if (grepl("community", message_test$ad_set[i])) { message_test$frame[i] <- "Community" } else if (grepl("culinary", message_test$ad_set[i])) { message_test$frame[i] <- "Food" } else if (grepl("nature", message_test$ad_set[i])) { message_test$frame[i] <- "Nature" } else { message_test$frame[i] <- "Physicality" } ## filling age column if (grepl("Young", message_test$ad_set[i])) { message_test$age[i] <- "18-34" } else if (grepl("Middle", message_test$ad_set[i])) { message_test$age[i] <- "35-54" } else { message_test$age[i] <- "55+" } ## filling gender column if (grepl("female", message_test$ad_set[i])) { message_test$gender[i] <- "Female" } else { message_test$gender[i] <- "Male" } } ## double-checking the above for loop message_test[, c("ad_set", "age", "gender", "frame")] ## making age, gender, and frame categorical variables message_test$age <- as.factor(message_test$age) message_test$gender <- as.factor(message_test$gender) message_test$frame <- as.factor(message_test$frame) message_test$ad_set <- as.factor(message_test$ad_set) ## making integer variables integer variables message_test$impressions <- as.integer(message_test$impressions) message_test$reach <- as.integer(message_test$reach) message_test$link_clicks_unique <- as.integer(message_test$link_clicks_unique) message_test$link_clicks_total <- as.integer(message_test$link_clicks_total) message_test$all_clicks_unique <- as.integer(message_test$all_clicks_unique) message_test$all_clicks_total <- as.integer(message_test$all_clicks_total) message_test$engagement_page <- as.integer(message_test$engagement_page) message_test$engagement_post <- as.integer(message_test$engagement_post) message_test$reactions <- as.integer(message_test$reactions) message_test$comments <- as.integer(message_test$comments) message_test$shares <- as.integer(message_test$shares) message_test$saves <- as.integer(message_test$saves) message_test$follows_likes <- as.integer(message_test$follows_likes) ## double-checking that all variables have the right class lapply(message_test, class) summary(message_test) #### analysis for unique link clicks (RQ1) #### ## creating deaggregate dataset for unique link clicks unique_link_clicks_aggregated <- message_test[, c("age","gender","frame","reach","link_clicks_unique")] colnames(unique_link_clicks_aggregated) <- c("Age", "Gender", "Frame", "Impressions", "Clicks") unique_link_clicks <- deaggregate_data(unique_link_clicks_aggregated) ## double-checking that the deaggregate function worked correctly sum(unique_link_clicks$Click[unique_link_clicks$Age=="18-34" & unique_link_clicks$Gender=="Female" & unique_link_clicks$Frame=="Community"]) sum(unique_link_clicks$Click[unique_link_clicks$Age=="55+" & unique_link_clicks$Gender=="Male" & unique_link_clicks$Frame=="Food"]) length(unique_link_clicks$Click[unique_link_clicks$Age=="35-54" & unique_link_clicks$Gender=="Female" & unique_link_clicks$Frame=="Nature"]) sum(unique_link_clicks$Click[unique_link_clicks$Age=="18-34" & unique_link_clicks$Gender=="Male" & unique_link_clicks$Frame=="Physicality"]) length(unique_link_clicks$Click[unique_link_clicks$Age=="18-34" & unique_link_clicks$Gender=="Female" & unique_link_clicks$Frame=="Community"]) sum(unique_link_clicks_aggregated$Impressions) ## running GLMs Click_glm_age_gender_frame <- glm(Click ~ Age*Gender*Frame, family = binomial(link = "logit"), data = unique_link_clicks) summary(Click_glm_age_gender_frame) Click_glm_age_gender <- glm(Click ~ Age*Gender, family = binomial(link = "logit"), data = unique_link_clicks) summary(Click_glm_age_gender) Click_glm_age_frame <- glm(Click ~ Age*Frame, family = binomial(link = "logit"), data = unique_link_clicks) summary(Click_glm_age_frame) Click_glm_gender_frame <- glm(Click ~ Gender*Frame, family = binomial(link = "logit"), data = unique_link_clicks) summary(Click_glm_gender_frame) Click_glm_age_gender_frame_no_ag_interaction <- glm(Click ~ Age*Frame + Gender*Frame, family = binomial(link = "logit"), data = unique_link_clicks) summary(Click_glm_age_gender_frame_no_ag_interaction) Click_glm_age_gender_frame_no_af_interaction <- glm(Click ~ Age*Gender + Frame*Gender, family = binomial(link = "logit"), data = unique_link_clicks) summary(Click_glm_age_gender_frame_no_af_interaction) Click_glm_age_gender_frame_no_fg_interaction <- glm(Click ~ Age*Gender + Age*Frame, family = binomial(link = "logit"), data = unique_link_clicks) summary(Click_glm_age_gender_frame_no_fg_interaction) ##### chi-square deviance tests ##### # testing significance of frame anova(Click_glm_age_gender_frame, Click_glm_age_gender, test = "Chi") # testing significance of gender anova(Click_glm_age_gender_frame, Click_glm_age_frame, test = "Chi") # testing significance of age anova(Click_glm_age_gender_frame, Click_glm_gender_frame, test = "Chi") # testing significance of age and gender interaction terms anova(Click_glm_age_gender_frame, Click_glm_age_gender_frame_no_ag_interaction, test = "Chi") # testing significance of age and frame interaction terms anova(Click_glm_age_gender_frame, Click_glm_age_gender_frame_no_af_interaction, test = "Chi") # testing significance of frame and gender interaction terms anova(Click_glm_age_gender_frame, Click_glm_age_gender_frame_no_fg_interaction, test = "Chi") ##### doing pairwise comparisons and generating confidence intervals for CTRs ##### ## controlling for gender and frame click_marginals_age <- emmeans(Click_glm_age_gender_frame, specs=pairwise ~ Age, type="response", adjust="sidak") click_marginals_age click_marginals_age$contrasts %>% summary(infer=TRUE) ## reordering the categories to report comparisons with OR>1 instead of OR<1 click_marginals_age <- emmeans(Click_glm_age_gender_frame, specs=pairwise ~ Age, type="response", adjust="sidak", at = list(Age = c("55+", "35-54", "18-34"))) click_marginals_age click_marginals_age$contrasts %>% summary(infer=TRUE) click_marginals_age_data <- as.data.frame(click_marginals_age$emmeans) ## controlling for age and frame click_marginals_gender <- emmeans(Click_glm_age_gender_frame, specs=pairwise ~ Gender, type="response", adjust="sidak") click_marginals_gender click_marginals_gender$contrasts %>% summary(infer=TRUE) click_marginals_gender_data <- as.data.frame(click_marginals_gender$emmeans) ## controlling for age and gender click_marginals_frame <- emmeans(Click_glm_age_gender_frame, specs=pairwise ~ Frame, type="response", adjust="sidak") click_marginals_frame click_marginals_frame$contrasts %>% summary(infer=TRUE) click_marginals_frame_data <- as.data.frame(click_marginals_frame$emmeans) ## reordering the categories to report comparisons for which OR>1 rather than OR<1 click_marginals_frame <- emmeans(Click_glm_age_gender_frame, specs=pairwise ~ Frame, type="response", adjust="sidak", at=list(Frame=c("Physicality", "Community", "Food", "Nature"))) click_marginals_frame click_marginals_frame$contrasts %>% summary(infer=TRUE) ## controlling for frame click_marginals_age_and_gender <- emmeans(Click_glm_age_gender_frame, specs=pairwise~Age*Gender, type="response", adjust="sidak") click_marginals_age_and_gender click_marginals_age_and_gender$contrasts %>% summary(infer=TRUE) ## the above code for age and gender did all 15 pairwise comparisons, but we just want ## a specific 9 comparisons, so the following code does just those 9 comparisons with the ## proper correction to p-values and confidence intervals. youngf <- c(1,0,0,0,0,0) middlef <- c(0,1,0,0,0,0) oldf <- c(0,0,1,0,0,0) youngm <- c(0,0,0,1,0,0) middlem <- c(0,0,0,0,1,0) oldm <- c(0,0,0,0,0,1) contrast(click_marginals_age_and_gender, method=list("middlef/youngf"=middlef-youngf, "oldf/youngf"=oldf-youngf, "oldf/middlef"=oldf-middlef, "middlem/youngm"=middlem-youngm, "oldm/youngm"=oldm-youngm, "oldm/middlem"=oldm-middlem, "youngf/youngm"=youngf-youngm, "middlef/middlem"=middlef-middlem, "oldf/oldm"=oldf-oldm), adjust="sidak") %>% summary(infer=TRUE) click_marginals_age_and_gender_data <- as.data.frame(click_marginals_age_and_gender$emmeans) ## controlling for gender click_marginals_age_and_frame <- emmeans(Click_glm_age_gender_frame, specs=pairwise~Age*Frame, type="response", adjust="sidak") click_marginals_age_and_frame click_marginals_age_and_frame$contrasts %>% summary(infer=TRUE) click_marginals_age_and_frame_data <- as.data.frame(click_marginals_age_and_frame$emmeans) ## the above code does all 66 age/frame comparison. The below code does just the 18 desired comparisons. ## controlling for gender, comparing frames within age groups click_frame_within_age_comparisons <- emmeans(Click_glm_age_gender_frame, specs=pairwise ~ Frame|Age, type="response", adjust="sidak") click_frame_within_age_comparisons click_frame_within_age_comparisons$contrasts %>% summary(infer = TRUE) ## changing order of reference groups to report most of the comparisons with OR>1 instead of OR<1 click_frame_within_age_comparisons <- emmeans(Click_glm_age_gender_frame, specs=pairwise ~ Frame|Age, type="response", adjust="sidak",at=list(Frame=c("Food", "Physicality", "Nature", "Community"))) click_frame_within_age_comparisons click_frame_within_age_comparisons$contrasts %>% summary(infer = TRUE) ## Combining the comparisons so the Sidak adjustment is for 18 comparisons instead of each 6 separately click_frame_within_age_comparisons$contrasts %>% rbind(adjust="sidak") %>% summary(infer=TRUE) ## changing order of reference groups to report the rest of the comparisons with OR>1 instead of OR<1 click_frame_within_age_comparisons <- emmeans(Click_glm_age_gender_frame, specs=pairwise ~ Frame|Age, type="response", adjust="sidak",at=list(Frame=c("Physicality", "Food", "Nature", "Community"))) click_frame_within_age_comparisons click_frame_within_age_comparisons$contrasts %>% summary(infer = TRUE) ## Combining the comparisons so the Sidak adjustment is for 18 comparisons instead of each 6 separately click_frame_within_age_comparisons$contrasts %>% rbind(adjust="sidak") %>% summary(infer=TRUE) ## controlling for age click_marginals_frame_and_gender <- emmeans(Click_glm_age_gender_frame, specs=pairwise~Frame*Gender, type="response", adjust="sidak") click_marginals_frame_and_gender click_marginals_frame_and_gender_data <- as.data.frame(click_marginals_frame_and_gender$emmeans) ## the above does all 28 pairwise comparisons, but we just want to compare frames within each gender. ## controlling for age, comparing frames within each gender click_frame_within_gender_comparisons <- emmeans(Click_glm_age_gender_frame, specs=pairwise ~ Frame|Gender, type="response", adjust="sidak", at=list(Frame=c("Physicality", "Food", "Community", "Nature"))) click_frame_within_gender_comparisons click_frame_within_gender_comparisons$contrasts %>% summary(infer=TRUE) ## combining the comparisons so that the Sidak adjustment is for 12 comparisons instead of each 6 separately click_frame_within_gender_comparisons$contrasts %>% rbind(adjust="sidak") %>% summary(infer=TRUE) ## model predictions for all 24 conditions click_marginals_all <- emmeans(Click_glm_age_gender_frame, specs=pairwise~Frame*Gender*Age, type="response", adjust="sidak") click_marginals_all click_marginals_all_data <- as.data.frame(click_marginals_all$emmeans) ##### ggplots of emmeans results for unique clicks model #### ###### setting general aesthetics of graphs (also used for reactions graphs) ###### # setting a dodge parameter to control width and placement of error bars dodge <- position_dodge(width=.9) # setting color schemes frame_colors <- c(rgb(167/255,108/255,173/255), rgb(254/255,225/255,0/255), rgb(108/255,190/255,73/255), rgb(161/255,50/255,34/255)) average_color <- "grey50" gender_colors <- c(rgb(117/255,234/255,182/255), rgb(53/255,136/255,209/255)) # setting y limits and labels of click graphs ylim_click <- c(0, 0.027*100) ylab_click <- "Unique link click-through rate (%)" # setting width of error bar lines linewidth_CI <- 0.7 # setting names of frames with capitalization frame_labels <- c("Community","Food","Nature","Physicality") # modifing the theme theme_mine <- theme(panel.grid.major.x = element_blank(), panel.grid.major.y = element_line(color="grey75"), axis.ticks.x = element_blank(), axis.ticks.y = element_line(color="grey75"), panel.grid.minor.y = element_line(color="grey75"), panel.background = element_rect(fill="white", color="grey90"), panel.spacing = unit(1, "lines"), plot.title = element_text(hjust=0.5) ) ## adding age marginals to age and frame marginals and adding gender marginals to gender and frame marginals click_marginals_age_data$Frame <- "All" click_graph_age_frame_data <- rbind(click_marginals_age_and_frame_data, click_marginals_age_data) click_marginals_gender_data$Frame <- "All" click_graph_gender_frame_data <- rbind(click_marginals_frame_and_gender_data, click_marginals_gender_data) ## marginal predictions for frame click_plot_frame <- ggplot(data=click_marginals_frame_data, aes(x=Frame, fill=Frame)) + geom_col(aes(y=prob*100)) + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), linewidth=linewidth_CI, position=dodge, width=.25) + ggtitle("Model predictions for CTRs \nbased on frame") + scale_y_continuous(limits=ylim_click, expand=c(0,0)) + ylab(ylab_click) + scale_fill_manual(values=frame_colors) + theme_mine + theme(legend.position = "none") click_plot_frame ## marginal predictions for age and frame click_plot_age_frame <- ggplot(data=click_graph_age_frame_data, aes(x=Age, fill=Frame)) + geom_col_pattern(aes(y=prob*100, pattern=Frame), pattern_color=rgb(0,0,0,0), position="dodge") + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), linewidth=linewidth_CI, position=dodge, width=.25) + ggtitle("Model predictions for CTRs \nbased on age and frame") + scale_y_continuous(limits=ylim_click, expand=c(0,0)) + ylab(ylab_click) + scale_fill_manual(values=c(frame_colors, average_color)) + theme_mine + scale_pattern_manual(values=c(Community="none",Food="none",Nature="none",Physicality="none", All="stripe")) + theme(legend.position="none") click_plot_age_frame ## marginal predictions for frame and gender click_plot_gender_frame <- ggplot(data=click_graph_gender_frame_data, aes(x=Gender, fill=Frame)) + geom_col_pattern(aes(y=prob*100, pattern=Frame), pattern_color=rgb(0,0,0,0), position="dodge") + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), linewidth=linewidth_CI, position=dodge, width=.25) + ggtitle("Model predictions for CTRs \nbased on gender and frame") + scale_y_continuous(limits=ylim_click, expand=c(0,0)) + ylab(NULL) + scale_fill_manual(values=c(frame_colors, average_color)) + theme_mine + scale_pattern_manual(values=c(Community="none",Food="none",Nature="none",Physicality="none", All="stripe")) click_plot_gender_frame ## marginal predictions for age and gender click_plot_age_gender <- ggplot(data=click_marginals_age_and_gender_data, aes(x=Age, fill=Gender)) + geom_col(aes(y=prob*100), position="dodge") + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), linewidth=linewidth_CI, position=dodge, width=.25) + ggtitle("Model predictions for CTRs \nbased on age and gender") + scale_y_continuous(limits=ylim_click, expand=c(0,0)) + ylab(ylab_click) + scale_fill_manual(values=gender_colors) + theme_mine click_plot_age_gender ## predictions for full model click_plot_full <- ggplot(data=click_marginals_all_data, aes(x=Age, fill=Frame)) + geom_col(aes(y=prob*100), position="dodge") + facet_wrap(~Gender) + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), linewidth=linewidth_CI, position=dodge, width=.25) + ggtitle("Model predictions for CTRs") + scale_y_continuous(limits=ylim_click, expand=c(0,0)) + ylab(ylab_click) + scale_fill_manual(values=frame_colors) + theme_mine click_plot_full ###### saving figures as .png - make sure to change filename as needed ###### ggsave( "click_frame_v3.png" , plot = click_plot_frame, scale = 1, width = 3.25, height = 3.25, units = "in", dpi=320 ) ggsave( "click_age_gender_v3.png" , plot = click_plot_age_gender, scale = 1, width = 3.25, height = 3.25, units = "in", dpi=320 ) ggsave( "click_age_frame_v3a.png" , plot = click_plot_age_frame, scale = 1, width = 4, height = 3.25, units = "in", dpi=320 ) ggsave( "click_gender_frame_v3a.png" , plot = click_plot_gender_frame, scale = 1, width = 2.5, height = 3.25, units = "in", dpi=320 ) ggsave( "click_full_v3.png" , plot = click_plot_full, scale = 1, width = 6.5, height = 3.25, units = "in", dpi=320 ) ###### saving figures as .eps - make sure to change filename as needed ###### ggsave( "click_frame_v3.eps" , plot = click_plot_frame, device = "eps", width = 6.5, height = 3.25, units = "in", dpi=1200 ) ggsave( "click_age_frame_v3.eps" , plot = click_plot_age_frame, device = "eps", width = 6.5, height = 3.25, units = "in", dpi=1200 ) ggsave( "click_age_gender_v3.eps" , plot = click_plot_age_gender, device = "eps", width = 6.5, height = 3.25, units = "in", dpi=1200 ) ggsave( "click_gender_frame_v3.eps" , plot = click_plot_gender_frame, device = "eps", width = 6.5, height = 3.25, units = "in", dpi=1200 ) ggsave( "click_full_v3.eps" , plot = click_plot_full, device = "eps", width = 6.5, height = 3.25, units = "in", dpi=1200 ) #### analysis for reactions (RQ2) #### ## creating deaggregate dataset for reactions reactions_aggregated <- message_test[, c("age","gender","frame","reach","reactions")] colnames(reactions_aggregated) <- c("Age", "Gender", "Frame", "Impressions", "Clicks") reactions <- deaggregate_data(reactions_aggregated) ## double-checking that the deaggregate function worked correctly sum(reactions$Click[reactions$Age=="18-34" & reactions$Gender=="Male" & reactions$Frame=="Nature"]) sum(reactions$Click[reactions$Age=="55+" & reactions$Gender=="Female" & reactions$Frame=="Food"]) length(reactions$Click[reactions$Age=="35-54" & reactions$Gender=="Female" & reactions$Frame=="Nature"]) sum(reactions$Click[reactions$Age=="18-34" & reactions$Gender=="Male" & reactions$Frame=="Physicality"]) length(reactions$Click[reactions$Age=="18-34" & reactions$Gender=="Female" & reactions$Frame=="Community"]) sum(reactions_aggregated$Impressions) ## running GLMs Reactions_glm_age_gender_frame <- glm(Click ~ Age*Gender*Frame, family = binomial(link = "logit"), data = reactions) summary(Reactions_glm_age_gender_frame) Reactions_glm_age_gender <- glm(Click ~ Age*Gender, family = binomial(link = "logit"), data = reactions) summary(Reactions_glm_age_gender) Reactions_glm_age_frame <- glm(Click ~ Age*Frame, family = binomial(link = "logit"), data = reactions) summary(Reactions_glm_age_frame) Reactions_glm_gender_frame <- glm(Click ~ Gender*Frame, family = binomial(link = "logit"), data = reactions) summary(Reactions_glm_gender_frame) Reactions_glm_age_gender_frame_no_ag_interaction <- glm(Click ~ Age*Frame + Gender*Frame, family = binomial(link = "logit"), data = reactions) summary(Reactions_glm_age_gender_frame_no_ag_interaction) Reactions_glm_age_gender_frame_no_af_interaction <- glm(Click ~ Age*Gender + Frame*Gender, family = binomial(link = "logit"), data = reactions) summary(Reactions_glm_age_gender_frame_no_af_interaction) Reactions_glm_age_gender_frame_no_fg_interaction <- glm(Click ~ Age*Gender + Age*Frame, family = binomial(link = "logit"), data = reactions) summary(Reactions_glm_age_gender_frame_no_fg_interaction) ##### chi-square deviance tests ##### # testing significance of frame anova(Reactions_glm_age_gender_frame, Reactions_glm_age_gender, test = "Chi") # testing significance of gender anova(Reactions_glm_age_gender_frame, Reactions_glm_age_frame, test = "Chi") # testing significance of age anova(Reactions_glm_age_gender_frame, Reactions_glm_gender_frame, test = "Chi") # testing significance of age and gender interaction terms anova(Reactions_glm_age_gender_frame, Reactions_glm_age_gender_frame_no_ag_interaction, test = "Chi") # testing significance of age and frame interaction terms anova(Reactions_glm_age_gender_frame, Reactions_glm_age_gender_frame_no_af_interaction, test = "Chi") # testing significance of frame and gender interaction terms anova(Reactions_glm_age_gender_frame, Reactions_glm_age_gender_frame_no_fg_interaction, test = "Chi") ##### doing pairwise comparisons and generating confidence intervals for reaction rates ##### ## controlling for gender and frame reactions_marginals_age <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise ~ Age, type="response", adjust="sidak") reactions_marginals_age reactions_marginals_age$contrasts %>% summary(infer=TRUE) ## reordering the categories to report comparisons with OR>1 instead of OR<1 reactions_marginals_age <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise ~ Age, type="response", adjust="sidak", at = list(Age = c("55+", "35-54", "18-34"))) reactions_marginals_age reactions_marginals_age$contrasts %>% summary(infer=TRUE) reactions_marginals_age_data <- as.data.frame(reactions_marginals_age$emmeans) ## controlling for age and frame reactions_marginals_gender <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise ~ Gender, type="response", adjust="sidak") reactions_marginals_gender reactions_marginals_gender$contrasts %>% summary(infer=TRUE) reactions_marginals_gender_data <- as.data.frame(reactions_marginals_gender$emmeans) ## controlling for age and gender reactions_marginals_frame <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise ~ Frame, type="response", adjust="sidak") reactions_marginals_frame reactions_marginals_frame$contrasts %>% summary(infer=TRUE) reactions_marginals_frame_data <- as.data.frame(reactions_marginals_frame$emmeans) ## reordering the categories to report comparisons with OR>1 rather than OR<1 reactions_marginals_frame <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise ~ Frame, type="response", adjust="sidak", at=list(Frame=c("Physicality", "Community", "Nature", "Food"))) reactions_marginals_frame reactions_marginals_frame$contrasts %>% summary(infer=TRUE) ## controlling for frame reactions_marginals_age_and_gender <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise~Age*Gender, type="response", adjust="sidak") reactions_marginals_age_and_gender reactions_marginals_age_and_gender$contrasts %>% summary(infer=TRUE) ## the above code for age and gender did all 15 pairwise comparisons, but we just want ## a specific 9 comparisons, so the following code does just those 9 comparisons with the ## proper correction to p-values and confidence intervals. youngf <- c(1,0,0,0,0,0) middlef <- c(0,1,0,0,0,0) oldf <- c(0,0,1,0,0,0) youngm <- c(0,0,0,1,0,0) middlem <- c(0,0,0,0,1,0) oldm <- c(0,0,0,0,0,1) contrast(reactions_marginals_age_and_gender, method=list("middlef/youngf"=middlef-youngf, "oldf/youngf"=oldf-youngf, "oldf/middlef"=oldf-middlef, "middlem/youngm"=middlem-youngm, "oldm/youngm"=oldm-youngm, "oldm/middlem"=oldm-middlem, "youngf/youngm"=youngf-youngm, "middlef/middlem"=middlef-middlem, "oldf/oldm"=oldf-oldm), adjust="sidak") %>% summary(infer=TRUE) reactions_marginals_age_and_gender_data <- as.data.frame(reactions_marginals_age_and_gender$emmeans) ## controlling for gender reactions_marginals_age_and_frame <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise~Age*Frame, type="response", adjust="sidak") reactions_marginals_age_and_frame reactions_marginals_age_and_frame$contrasts %>% summary(infer=TRUE) reactions_marginals_age_and_frame_data <- as.data.frame(reactions_marginals_age_and_frame$emmeans) ## the above code does all 66 age/frame comparison. The below code does just the 18 desired comparisons. ## controlling for gender, comparing frames within age groups reactions_frame_within_age_comparisons <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise ~ Frame|Age, type="response", adjust="sidak") reactions_frame_within_age_comparisons reactions_frame_within_age_comparisons$contrasts %>% summary(infer = TRUE) ## changing order of reference groups to report comparisons with OR>1 instead of OR<1 reactions_frame_within_age_comparisons <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise ~ Frame|Age, type="response", adjust="sidak",at=list(Frame=c("Nature", "Physicality", "Food", "Community"))) reactions_frame_within_age_comparisons reactions_frame_within_age_comparisons$contrasts %>% summary(infer = TRUE) ## combining the comparisons so the Sidak adjustment is for 18 comparisons instead of each 6 separately reactions_frame_within_age_comparisons$contrasts %>% rbind(adjust="sidak") %>% summary(infer=TRUE) ## controlling for age reactions_marginals_frame_and_gender <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise~Frame*Gender, type="response", adjust="sidak") reactions_marginals_frame_and_gender reactions_marginals_frame_and_gender_data <- as.data.frame(reactions_marginals_frame_and_gender$emmeans) ## the above does all 28 pairwise comparisons, but we just want to compare frames within each gender. ## controlling for age, comparing frames within each gender reactions_frame_within_gender_comparisons <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise ~ Frame|Gender, type="response", adjust="sidak", at=list(Frame=c("Physicality", "Food", "Community", "Nature"))) reactions_frame_within_gender_comparisons reactions_frame_within_gender_comparisons$contrasts %>% summary(infer=TRUE) ## combining the comparisons so the Sidak adjustment is for 12 comparisons instead of each 6 separately reactions_frame_within_gender_comparisons$contrasts %>% rbind(adjust="sidak") %>% summary(infer=TRUE) ## model predictions for all 24 conditions reactions_marginals_all <- emmeans(Reactions_glm_age_gender_frame, specs=pairwise~Frame*Gender*Age, type="response", adjust="sidak") reactions_marginals_all reactions_marginals_all_data <- as.data.frame(reactions_marginals_all$emmeans) ##### ggplots of emmeans results for reactions model ##### # setting a dodge parameter to control width and placement of error bars dodge <- position_dodge(width=.9) # setting y limits and label of reactions graphs ylim_reactions_low <- c(0, 0.0065*100) ylim_reactions_high <- c(0, 0.0115*100) ylab_reactions <- "Reaction rate (%)" ## adding age marginals to age and frame marginals and adding gender marginals to gender and frame marginals reactions_marginals_age_data$Frame <- "All" reactions_graph_age_frame_data <- rbind(reactions_marginals_age_and_frame_data, reactions_marginals_age_data) reactions_marginals_gender_data$Frame <- "All" reactions_graph_gender_frame_data <- rbind(reactions_marginals_frame_and_gender_data, reactions_marginals_gender_data) ## marginal predictions for frame reactions_plot_frame <- ggplot(data=reactions_marginals_frame_data, aes(x=Frame)) + geom_col(aes(y=prob*100, fill=Frame)) + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), linewidth=linewidth_CI, position=dodge, width=.25) + ggtitle("Model predictions for reaction rates \nbased on frame") + theme_mine + scale_fill_manual(values=frame_colors) + scale_y_continuous(limits=ylim_reactions_low, expand=c(0,0)) + ylab(ylab_reactions) + theme(legend.position = "none") reactions_plot_frame ## marginal predictions for age and gender reactions_plot_age_gender <- ggplot(data=reactions_marginals_age_and_gender_data, aes(x=Age, fill=Gender)) + geom_col(aes(y=prob*100), position="dodge") + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), linewidth=linewidth_CI, position=dodge, width=.25) + ggtitle("Model predictions for reaction rates \nbased on age and gender") + theme_mine + scale_fill_manual(values=gender_colors) + scale_y_continuous(limits=ylim_reactions_low, expand=c(0,0)) + ylab(ylab_reactions) reactions_plot_age_gender ## marginal predictions for age and frame reactions_plot_age_frame <- ggplot(data=reactions_graph_age_frame_data, aes(x=Age, fill=Frame)) + geom_col_pattern(aes(y=prob*100, pattern=Frame), pattern_color=rgb(0,0,0,0), position="dodge") + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), linewidth=linewidth_CI, position=dodge, width=.25) + ggtitle("Model predictions for reaction rates \nbased on age and frame") + theme_mine + scale_fill_manual(values=c(frame_colors,average_color)) + scale_y_continuous(limits=c(0,0.9), expand=c(0,0), breaks=c(0, .2, .4, .6, .8)) + ylab(ylab_reactions) + scale_pattern_manual(values=c(Community="none",Food="none",Nature="none",Physicality="none", All="stripe")) + theme(legend.position = "none") reactions_plot_age_frame ## marginal predictions for frame and gender reactions_plot_gender_frame <- ggplot(data=reactions_graph_gender_frame_data, aes(x=Gender, fill=Frame)) + geom_col_pattern(aes(y=prob*100, pattern=Frame), pattern_color=rgb(0,0,0,0), position="dodge") + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), linewidth=linewidth_CI, position=dodge, width=.25) + ggtitle("Model predictions for reaction rates \nbased on gender and frame") + theme_mine + scale_fill_manual(values=c(frame_colors,average_color)) + scale_y_continuous(limits=c(0,0.9), expand=c(0,0), breaks=c(0, .2, .4, .6, .8)) + ylab(NULL) + scale_pattern_manual(values=c(Community="none",Food="none",Nature="none",Physicality="none", All="stripe")) reactions_plot_gender_frame ## predictions for full model reactions_plot_full <- ggplot(data=reactions_marginals_all_data, aes(x=Age, fill=Frame)) + geom_col(aes(y=prob*100), position="dodge") + facet_wrap(~Gender) + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), linewidth=linewidth_CI, position=dodge, width=.25) + ggtitle("Model predictions for reaction rates") + theme_mine + scale_fill_manual(values=c(frame_colors,average_color)) + scale_y_continuous(limits=ylim_reactions_high, expand=c(0,0), breaks=c(0, .2, .4, .6, .8, 1)) + ylab(ylab_reactions) reactions_plot_full ###### saving figures as .png - make sure to change filename as needed ###### ggsave( "reactions_frame_v3.png" , plot = reactions_plot_frame, scale = 1, width = 3.25, height = 3.25, units = "in", dpi=320 ) ggsave( "reactions_age_gender_v3.png" , plot = reactions_plot_age_gender, scale = 1, width = 3.25, height = 3.25, units = "in", dpi=320 ) ggsave( "reactions_age_frame_v3.png" , plot = reactions_plot_age_frame, scale = 1, width = 4, height = 3.25, units = "in", dpi=320 ) ggsave( "reactions_gender_frame_v3.png" , plot = reactions_plot_gender_frame, scale = 1, width = 2.5, height = 3.25, units = "in", dpi=320 ) ggsave( "reactions_full_v3.png" , plot = reactions_plot_full, scale = 1, width = 6.5, height = 3.25, units = "in", dpi=320 ) ###### saving figures as .eps - make sure to change filename as needed ###### ggsave( "reactions_frame_v3.eps" , plot = reactions_plot_frame, device = "eps", width = 6.5, height = 3.25, units = "in", dpi=1200 ) ggsave( "reactions_age_frame_v3.eps" , plot = reactions_plot_age_frame, device = "eps", width = 6.5, height = 3.25, units = "in", dpi=1200 ) ggsave( "reactions_age_gender_v3.eps" , plot = reactions_plot_age_gender, device = "eps", width = 6.5, height = 3.25, units = "in", dpi=1200 ) ggsave( "reactions_gender_frame_v3.eps" , plot = reactions_plot_gender_frame, device = "eps", width = 6.5, height = 3.25, units = "in", dpi=1200 ) ggsave( "reactions_full_v3.eps" , plot = reactions_plot_full, device = "eps", width = 6.5, height = 3.25, units = "in", dpi=1200 ) #### scatter plot for clicks and reactions #### # making a dataset with just age, gender, frame, and predicted CTR scatter_data_clicks <- click_marginals_all_data[,1:4] # changing column names colnames(scatter_data_clicks) <- c("Frame", "Gender", "Age","click_prob") # adding a column for predicted reaction rates scatter_data <- cbind(scatter_data_clicks, reactions_marginals_all_data$prob) # updating column names again colnames(scatter_data) <- c("Frame", "Gender", "Age","click_prob","reaction_prob") # normalizing the click and reaction rates based on the average across all 24 conditions scatter_data$click_prob <- scatter_data$click_prob/0.0157 scatter_data$reaction_prob <- scatter_data$reaction_prob/0.00322 # manually setting the list of shapes to use for each age/gender/frame combination shapes_list <- c(16,16,16,16,1,1,1,1, 17,17,17,17,2,2,2,2, 15,15,15,15,0,0,0,0) shapes_list <- as.integer(shapes_list) # creating the scatter plot scatter_plot <- ggplot(data=scatter_data, mapping=aes(x=reaction_prob, y=click_prob, color=Frame, size=Age), shape=shapes_list) + geom_point(shape=shapes_list) + scale_color_manual(values=frame_colors) + scale_y_continuous(limits = c(0,1.6), expand=c(0,0), breaks = c(0,0.5,1,1.5)) + scale_x_continuous(limits = c(0,3), expand=c(0,0), breaks=seq(0,3,.5)) + theme_mine + theme(panel.grid.major.x = element_blank(), panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(), axis.ticks.x = element_line(color="grey75")) + geom_hline(yintercept = 1, color="grey60") + geom_vline(xintercept = 1, color="grey60") + geom_abline(intercept=0, slope=1, linetype="dashed") + ggtitle("Normalized CTRs versus reaction rates \n(Circles=18-34, Triangles=35-54, Squares=55+, \nWomen=filled, Men=hollow)") + ylab("Normalized unique click-through rate") + xlab("Normalized reaction rate") + scale_size_manual(values=c(3, 4, 5)) scatter_plot # saving the scatterplot - make sure to change filename as needed ggsave ( "scatter_plot_v4.png" , plot = scatter_plot, scale = 1, width = 6.5, height = 4, units = "in", dpi=320 ) ggsave ( "scatter_plot_v4.eps" , plot = scatter_plot, device="eps", width = 6.5, height = 4, units = "in", dpi=1220 ) #### descriptive analyses #### sum(message_test$impressions) sum(message_test$link_clicks_unique) sum(message_test$link_clicks_total) min(message_test$impressions) max(message_test$impressions) min(message_test$reach) max(message_test$reach) ##### making data frame for table to include in paper ##### # adding a column for reaction rate (RR), defined as reactions divided by reach message_test$RR <- message_test$reactions/message_test$reach # extracting just the columns to use in the table in the paper descriptive_data <- message_test[,c("gender", "age", "frame", "reach", "link_clicks_unique", "link_clicks_unique_CTR", "link_clicks_unique_CPC", "reactions", "RR")] # making the CTR and reaction rates percentages instead of raw proportions descriptive_data$link_clicks_unique_CTR <- descriptive_data$link_clicks_unique_CTR*100 descriptive_data$RR <- descriptive_data$RR*100 # saving the file write.csv(descriptive_data, file="descriptive_data_v2.csv") ##### unique link CPC ##### CPC_data <- message_test[,c("ad_set", "link_clicks_total_CPC", "link_clicks_unique_CPC")] ## plotting CPC for all 24 conditions CPC_all_plot <- ggplot(data=message_test, mapping = aes(fill=frame, x=age)) + geom_col(aes(y=link_clicks_unique_CPC), position="dodge") + facet_wrap(~gender) + theme_mine + ggtitle("CPC for each condition") + ylab("Cost per unique link click ($)") + scale_y_continuous(limits=c(0, 1), expand=c(0,0), breaks=c(0, .2, .4, .6, .8, 1)) + scale_fill_manual(values=frame_colors) CPC_all_plot # saving CPC plot - make sure to change filename as needed ggsave ( "CPC_all_v3.png" , plot = CPC_all_plot, scale = 1, width = 6.5, height = 3, units = "in", dpi=320 ) ggsave ( "CPC_all_v3.eps" , plot = CPC_all_plot, device="eps", width = 6.5, height = 3.25, units = "in", dpi=1220 ) ###### CPC by age ###### ## creating a dataframe for CPCs within each age group (simple average across gender and frame) unique_link_clicks_age <- as.data.frame(matrix(data=NA, nrow=3, ncol=5)) colnames(unique_link_clicks_age) <- c("age", "dollars_spent", "reach", "link_clicks_unique", "link_clicks_unique_CPC") unique_link_clicks_age$age <- c("18-34", "35-54", "55+") for (i in 1:3) { unique_link_clicks_age$dollars_spent[i] <- sum(message_test$dollars_spent[message_test$age==unique_link_clicks_age$age[i]]) unique_link_clicks_age$reach[i] <- sum(message_test$reach[message_test$age==unique_link_clicks_age$age[i]]) unique_link_clicks_age$link_clicks_unique[i] <- sum(message_test$link_clicks_unique[message_test$age==unique_link_clicks_age$age[i]]) } unique_link_clicks_age$link_clicks_unique_CPC <- unique_link_clicks_age$dollars_spent/unique_link_clicks_age$link_clicks_unique ## checking that the dataframe was created properly sum(message_test$reach[message_test$age=="18-34"]) sum(message_test$reach[message_test$age=="35-54"]) sum(message_test$reach[message_test$age=="55+"]) sum(message_test$link_clicks_unique[message_test$age=="18-34"]) sum(message_test$link_clicks_unique[message_test$age=="35-54"]) sum(message_test$link_clicks_unique[message_test$age=="55+"]) ## plotting the CPC as a function of age ggplot(data=unique_link_clicks_age, mapping = aes(x=age)) + geom_col(aes(y=link_clicks_unique_CPC)) + ggtitle("Unique link CPC by age") ###### CPC by gender ###### ## creating a dataframe for CPCs within each gender (simple average across age and frame) unique_link_clicks_gender <- as.data.frame(matrix(data=NA, nrow=2, ncol=5)) colnames(unique_link_clicks_gender) <- c("gender", "dollars_spent", "reach", "link_clicks_unique", "link_clicks_unique_CPC") unique_link_clicks_gender$gender <- c("Male", "Female") for (i in 1:2) { unique_link_clicks_gender$dollars_spent[i] <- sum(message_test$dollars_spent[message_test$gender==unique_link_clicks_gender$gender[i]]) unique_link_clicks_gender$reach[i] <- sum(message_test$reach[message_test$gender==unique_link_clicks_gender$gender[i]]) unique_link_clicks_gender$link_clicks_unique[i] <- sum(message_test$link_clicks_unique[message_test$gender==unique_link_clicks_gender$gender[i]]) } unique_link_clicks_gender$link_clicks_unique_CPC <- unique_link_clicks_gender$dollars_spent/unique_link_clicks_gender$link_clicks_unique ## checking that the dataframe was created properly sum(message_test$reach[message_test$gender=="Male"]) sum(message_test$reach[message_test$gender=="Female"]) sum(message_test$link_clicks_unique[message_test$gender=="Male"]) sum(message_test$link_clicks_unique[message_test$gender=="Female"]) ## plotting the CPC as a function of gender ggplot(data=unique_link_clicks_gender, mapping = aes(x=gender)) + geom_col(aes(y=link_clicks_unique_CPC)) + ggtitle("Unique link CPC by gender") ###### CPC by frame ###### ## creating a dataframe for CPCs within each frame (simple average across age and gender) unique_link_clicks_frame <- as.data.frame(matrix(data=NA, nrow=4, ncol=5)) colnames(unique_link_clicks_frame) <- c("frame", "dollars_spent", "reach", "link_clicks_unique", "link_clicks_unique_CPC") unique_link_clicks_frame$frame <- c("Community", "Food", "Nature", "Physicality") for (i in 1:4) { unique_link_clicks_frame$dollars_spent[i] <- sum(message_test$dollars_spent[message_test$frame==unique_link_clicks_frame$frame[i]]) unique_link_clicks_frame$reach[i] <- sum(message_test$reach[message_test$frame==unique_link_clicks_frame$frame[i]]) unique_link_clicks_frame$link_clicks_unique[i] <- sum(message_test$link_clicks_unique[message_test$frame==unique_link_clicks_frame$frame[i]]) } unique_link_clicks_frame$link_clicks_unique_CPC <- unique_link_clicks_frame$dollars_spent/unique_link_clicks_frame$link_clicks_unique ## checking that the dataframe was created properly sum(message_test$reach[message_test$frame=="Community"]) sum(message_test$reach[message_test$frame=="Food"]) sum(message_test$reach[message_test$frame=="Nature"]) sum(message_test$reach[message_test$frame=="Physicality"]) sum(message_test$link_clicks_unique[message_test$frame=="Community"]) sum(message_test$link_clicks_unique[message_test$frame=="Food"]) sum(message_test$link_clicks_unique[message_test$frame=="Nature"]) sum(message_test$link_clicks_unique[message_test$frame=="Physicality"]) ## plotting the CPC as a function of frame ggplot(data=unique_link_clicks_frame, mapping = aes(x=frame)) + geom_col(aes(y=link_clicks_unique_CPC)) + ggtitle("Unique link CPC by frame") #### Analyzing non-stratified follow-up test #### ##### loading and cleaning data ##### unstratified_full <- read.csv("Message-test-unstratified.csv") ## removing unnecessary columns unstratified_full <- unstratified_full %>% select(-one_of('Reporting.starts', 'Reporting.ends', 'Cost.per.follow.or.like')) ## renaming columns to be easier to use colnames(unstratified_full) <- c("age", "gender", "dollars_spent", "impressions", "impressions_CPM", "reach", "reach_CPM", "frequency", "link_clicks_unique", "link_clicks_unique_CTR", "link_clicks_unique_CPC", "all_clicks_unique", "all_clicks_unique_CTR", "all_clicks_unique_CPC", "link_clicks_total", "link_clicks_total_CTR", "link_clicks_total_CPC", "all_clicks_total", "all_clicks_total_CTR", "all_clicks_total_CPC", "engagement_page", "engagement_post", "reactions", "comments", "shares", "saves", "follows_likes", "engagement_page_CP1", "engagement_post_CP1") ## replacing the percentage CTRs with raw proportion CTRs (max value 1) unstratified_full$link_clicks_unique_CTR <- unstratified_full$link_clicks_unique_CTR/100 unstratified_full$all_clicks_unique_CTR <- unstratified_full$all_clicks_unique_CTR/100 unstratified_full$link_clicks_total_CTR <- unstratified_full$link_clicks_total_CTR/100 unstratified_full$all_clicks_total_CTR <- unstratified_full$all_clicks_total_CTR/100 ## replacing NA values for comments, shares, saves, and follows/likes with 0 unstratified_full <- unstratified_full %>% replace(is.na(.), 0) ## making age and gender categorical variables unstratified_full$age <- as.factor(unstratified_full$age) unstratified_full$gender <- as.factor(unstratified_full$gender) ## making integer variables integer variables unstratified_full$impressions <- as.integer(unstratified_full$impressions) unstratified_full$reach <- as.integer(unstratified_full$reach) unstratified_full$link_clicks_unique <- as.integer(unstratified_full$link_clicks_unique) unstratified_full$link_clicks_total <- as.integer(unstratified_full$link_clicks_total) unstratified_full$all_clicks_unique <- as.integer(unstratified_full$all_clicks_unique) unstratified_full$all_clicks_total <- as.integer(unstratified_full$all_clicks_total) unstratified_full$engagement_page <- as.integer(unstratified_full$engagement_page) unstratified_full$engagement_post <- as.integer(unstratified_full$engagement_post) unstratified_full$reactions <- as.integer(unstratified_full$reactions) unstratified_full$comments <- as.integer(unstratified_full$comments) unstratified_full$shares <- as.integer(unstratified_full$shares) unstratified_full$saves <- as.integer(unstratified_full$saves) unstratified_full$follows_likes <- as.integer(unstratified_full$follows_likes) ## double-checking that all variables have the right class lapply(unstratified_full, class) summary(unstratified_full) ### each of our desired 3 age groups consists of 2 of the age groups present in this data, ### so the following code creates a new dataframe for combining the pairs of age groups unstratified <- as.data.frame(matrix(data=NA, nrow=9, ncol=8)) colnames(unstratified) <- c("age", "gender", "impressions", "reach", "prop_impressions_all", "prop_reach_all", "prop_impressions_fm", "prop_reach_fm") unstratified$age <- c(rep("18-34", 3), rep("35-54", 3), rep("55+",3)) unstratified$gender <- rep(c("Female", "Male", "Unknown"),3) ## making age and gender categorical variables unstratified$age <- as.factor(unstratified$age) unstratified$gender <- as.factor(unstratified$gender) ## adding together the impressions for each pair of age groups ## and adding together the reach for each pair of age groups for (x in levels(unstratified$gender)) { unstratified$impressions[unstratified$gender==x & unstratified$age=="18-34"] <- unstratified_full$impressions[unstratified_full$gender==x & unstratified_full$age=="18-24"] + unstratified_full$impressions[unstratified_full$gender==x & unstratified_full$age=="25-34"] unstratified$impressions[unstratified$gender==x & unstratified$age=="35-54"] <- unstratified_full$impressions[unstratified_full$gender==x & unstratified_full$age=="35-44"] + unstratified_full$impressions[unstratified_full$gender==x & unstratified_full$age=="45-54"] unstratified$impressions[unstratified$gender==x & unstratified$age=="55+"] <- unstratified_full$impressions[unstratified_full$gender==x & unstratified_full$age=="55-64"] + unstratified_full$impressions[unstratified_full$gender==x & unstratified_full$age=="65+"] unstratified$reach[unstratified$gender==x & unstratified$age=="18-34"] <- unstratified_full$reach[unstratified_full$gender==x & unstratified_full$age=="18-24"] + unstratified_full$reach[unstratified_full$gender==x & unstratified_full$age=="25-34"] unstratified$reach[unstratified$gender==x & unstratified$age=="35-54"] <- unstratified_full$reach[unstratified_full$gender==x & unstratified_full$age=="35-44"] + unstratified_full$reach[unstratified_full$gender==x & unstratified_full$age=="45-54"] unstratified$reach[unstratified$gender==x & unstratified$age=="55+"] <- unstratified_full$reach[unstratified_full$gender==x & unstratified_full$age=="55-64"] + unstratified_full$reach[unstratified_full$gender==x & unstratified_full$age=="65+"] } ## filling in the columns for proportions, including Unknown gender unstratified$prop_impressions_all <- unstratified$impressions/sum(unstratified$impressions) unstratified$prop_reach_all <- unstratified$reach/sum(unstratified$reach) ## filling the columns for proportions, excluding Unknown gender impressions_fm <- sum(unstratified$impressions[unstratified$gender!="Unknown"]) reach_fm <- sum(unstratified$reach[unstratified$gender!="Unknown"]) unstratified$prop_impressions_fm <- unstratified$impressions/impressions_fm unstratified$prop_reach_fm <- unstratified$reach/reach_fm unstratified$prop_impressions_fm[unstratified$gender=="Unknown"] <- NA unstratified$prop_reach_fm[unstratified$gender=="Unknown"] <- NA ## checking that this worked sum(unstratified$reach) sum(unstratified_full$reach) sum(unstratified$prop_reach_fm, na.rm = TRUE) ## making an unstratified dataset omitting Unknown gender unstratified_fm <- unstratified[unstratified$gender!="Unknown",] ## removing "Unknown" from the list of levels of "gender" unstratified_fm$gender <- as.factor(as.character((unstratified_fm$gender))) levels(unstratified_fm$gender) ##### extracting population proportions (including unknown gender) for the table in the paper ##### sum(unstratified$prop_reach_all[unstratified$gender=="Female"]) sum(unstratified$prop_reach_all[unstratified$gender=="Male"]) sum(unstratified$prop_reach_all[unstratified$gender=="Unknown"]) sum(unstratified$prop_reach_all[unstratified$age=="18-34"]) sum(unstratified$prop_reach_all[unstratified$age=="35-54"]) sum(unstratified$prop_reach_all[unstratified$age=="55+"]) ##### calculating the unstratified unique link CTR for each frame ##### # community CTR_unstratified_community <- 0.0 levels(unstratified_fm$age) levels(unstratified_fm$gender) for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { CTR_unstratified_community <- CTR_unstratified_community + message_test$link_clicks_unique_CTR[message_test$age==a & message_test$gender==g & message_test$frame=="Community"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } CTR_unstratified_community # food CTR_unstratified_culinary <- 0.0 for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { CTR_unstratified_culinary <- CTR_unstratified_culinary + message_test$link_clicks_unique_CTR[message_test$age==a & message_test$gender==g & message_test$frame=="Food"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } CTR_unstratified_culinary # nature CTR_unstratified_nature <- 0.0 for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { CTR_unstratified_nature <- CTR_unstratified_nature + message_test$link_clicks_unique_CTR[message_test$age==a & message_test$gender==g & message_test$frame=="Nature"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } CTR_unstratified_nature # physicality CTR_unstratified_physicality <- 0.0 for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { CTR_unstratified_physicality <- CTR_unstratified_physicality + message_test$link_clicks_unique_CTR[message_test$age==a & message_test$gender==g & message_test$frame=="Physicality"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } CTR_unstratified_physicality ##### calculating the unstratified CPC for each frame ##### # community CPC_unstratified_community <- 0.0 for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { CPC_unstratified_community <- CPC_unstratified_community + message_test$link_clicks_unique_CPC[message_test$age==a & message_test$gender==g & message_test$frame=="Community"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } CPC_unstratified_community # food CPC_unstratified_culinary <- 0.0 for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { CPC_unstratified_culinary <- CPC_unstratified_culinary + message_test$link_clicks_unique_CPC[message_test$age==a & message_test$gender==g & message_test$frame=="Food"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } CPC_unstratified_culinary # nature CPC_unstratified_nature <- 0.0 for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { CPC_unstratified_nature <- CPC_unstratified_nature + message_test$link_clicks_unique_CPC[message_test$age==a & message_test$gender==g & message_test$frame=="Nature"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } CPC_unstratified_nature # physicality CPC_unstratified_physicality <- 0.0 for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { CPC_unstratified_physicality <- CPC_unstratified_physicality + message_test$link_clicks_unique_CPC[message_test$age==a & message_test$gender==g & message_test$frame=="Physicality"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } CPC_unstratified_physicality ##### calculating the unstratified unique RR for each frame ##### ## adding a column in the message test data for reaction rate (RR) (this has already been done if all the code up to this point has been run) message_test$RR <- message_test$reactions/message_test$reach # community RR_unstratified_community <- 0.0 for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { RR_unstratified_community <- RR_unstratified_community + message_test$RR[message_test$age==a & message_test$gender==g & message_test$frame=="Community"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } RR_unstratified_community # food RR_unstratified_culinary <- 0.0 for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { RR_unstratified_culinary <- RR_unstratified_culinary + message_test$RR[message_test$age==a & message_test$gender==g & message_test$frame=="Food"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } RR_unstratified_culinary # nature RR_unstratified_nature <- 0.0 for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { RR_unstratified_nature <- RR_unstratified_nature + message_test$RR[message_test$age==a & message_test$gender==g & message_test$frame=="Nature"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } RR_unstratified_nature # physicality RR_unstratified_physicality <- 0.0 for (a in levels(unstratified_fm$age)) { for (g in levels(unstratified_fm$gender)) { RR_unstratified_physicality <- RR_unstratified_physicality + message_test$RR[message_test$age==a & message_test$gender==g & message_test$frame=="Physicality"]*unstratified_fm$prop_reach_fm[unstratified_fm$age==a & unstratified_fm$gender==g] } } RR_unstratified_physicality # Session information # sessionInfo() # # R version 4.3.2 (2023-10-31) # Platform: aarch64-apple-darwin20 (64-bit) # Running under: macOS Sonoma 14.3.1 # # Matrix products: default # BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib # LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0 # # locale: # [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 # # time zone: America/Chicago # tzcode source: internal # # attached base packages: # [1] stats graphics grDevices utils datasets methods base # # other attached packages: # [1] ggpattern_1.0.1 gridpattern_1.1.1 dplyr_1.1.4 boot_1.3-28.1 emmeans_1.9.0 faraway_1.0.8 # [7] magrittr_2.0.3 ggplot2_3.4.4 # # loaded via a namespace (and not attached): # [1] utf8_1.2.4 generics_0.1.3 class_7.3-22 KernSmooth_2.23-22 lattice_0.22-5 lme4_1.1-35.1 # [7] grid_4.3.2 estimability_1.4.1 mvtnorm_1.2-4 fastmap_1.1.1 Matrix_1.6-4 e1071_1.7-14 # [13] DBI_1.2.0 fansi_1.0.6 scales_1.3.0 textshaping_0.3.7 cli_3.6.2 rlang_1.1.2 # [19] units_0.8-5 munsell_0.5.0 splines_4.3.2 withr_2.5.2 cachem_1.0.8 tools_4.3.2 # [25] memoise_2.0.1 nloptr_2.0.3 coda_0.19-4 minqa_1.2.6 colorspace_2.1-0 vctrs_0.6.5 # [31] R6_2.5.1 proxy_0.4-27 lifecycle_1.0.4 classInt_0.4-10 MASS_7.3-60 ragg_1.2.7 # [37] pkgconfig_2.0.3 pillar_1.9.0 gtable_0.3.4 glue_1.6.2 Rcpp_1.0.11 systemfonts_1.0.5 # [43] sf_1.0-15 tibble_3.2.1 tidyselect_1.2.0 rstudioapi_0.15.0 farver_2.1.1 xtable_1.8-4 # [49] nlme_3.1-164 labeling_0.4.3 compiler_4.3.2 # Time the code toc <- Sys.time() # Print how long it took to run the code print(paste0("The code began running at ", tic, " and stopped running at ", toc))