df <- read_csv("../data/nootroflix_ratings_users_clean.csv")
In this part, I present the methodology I’ll use in all this post.
First, some changes that happened to Nootroflix, that we have to take into account:
In the first version of Nootroflix (for which there are the most ratings), the default answer for “issues with this nootropic” was “None / Unsure”, and you could only select one issue.
In the subsequent versions, multi-selection was possible, and the default answer was just no selection (but you could select “None/Unsure”)
For the first version, we restrict our analysis to users with enough ratings (>15) and at least one nootropic for which they entered an issue (because a lot of people weren’t entering issues). This probably is an underestimate, because some people might have entered one issue but not all issues they’ve had.
For the second version, we can have upper and lower bounds for the probability of an issue, as well as an unbiased (but very noisy) estimate:
lower bound: number of people who reported this issue for this nootropic / number of people who tried this nootropic. To tighten the bound, we restrict our analysis to people who rated enough nootropics, and reported at least one issue.
upper bound: number of people who reported this issue for this nootropic / number of people who answered the issue question for this nootropic (no empty selection). This is probably an upper bound because you’re more likely to answer the “issues” question if you had an issue. To tighten the bound, we can restrict ourselves to users who have entered “None/Unsure” as an issue for at least one other nootropic.
unbiased estimate: Some users (169, with 1830 ratings) were kind enough to answer the issue question for every rating they made. If we restrict ourselves to these users, we should an unbiased estimate, assuming these users aren’t too unusual.
Let’s see what it gives us for estimating the probability of addiction:
df_new <- df %>%
filter(str_detect(issue, "\\[")) %>%
mutate(issue = str_remove(issue, "\\["),
issue = str_remove(issue, "\\]"),
issue = str_remove_all(issue, "\\'"),
issue = str_split(issue, ",")) %>%
unnest(issue) %>%
mutate(issue = case_when(
issue == "" ~ "unknown",
str_detect(issue, "None") ~ "none",
str_detect(issue, "Developed addiction") ~ "addiction",
str_detect(issue, "Developed tolerance") ~ "tolerance",
str_detect(issue, "Other issues") ~ "other",
str_detect(issue, "Had to stop because of side effects") ~ "side_effects",
str_detect(issue, "Persistent side effects") ~ "long_term_side_effects"))
df_old <- df %>%
filter(!str_detect(issue, "\\[")) %>%
mutate(issue = case_when(
issue == "" ~ "unknown",
str_detect(issue, "None") ~ "none",
str_detect(issue, "Developed addiction") ~ "addiction",
str_detect(issue, "Developed tolerance") ~ "tolerance",
str_detect(issue, "Other issues") ~ "other",
str_detect(issue, "Had to stop because of side effects") ~ "side_effects",
str_detect(issue, "Persistent side effects") ~ "long_term_side_effects"))
Using data from nootroflix first version:
users_filling_issues <- (df_old %>%
group_by(userID) %>%
filter(n() > 15) %>% #enough ratings
ungroup() %>%
filter(issue != "none") %>% #at least one not None
select(userID) %>%
distinct)$userID
estimate_old <- df_old %>%
filter(userID %in% users_filling_issues) %>%
group_by(itemID, issue) %>%
summarise(count = n()) %>%
mutate(count_total = sum(count)) %>%
mutate(variant = "firt version")
Using data from Nootroflix second version:
estimate_new_lower <- df_new %>%
filter(userID %in% (df_new %>%
group_by(userID) %>%
filter(n() > 15) %>% #enough ratings
ungroup() %>%
filter(issue != "unknown") %>% #at least one issue entered
select(userID) %>%
distinct)$userID) %>%
group_by(itemID, issue) %>% #assume only one rating set per user
summarise(count = n()) %>%
mutate(count_total = sum(count)) %>%
mutate(variant = "second version, lower")
estimate_new_upper <- df_new %>%
filter(userID %in% (df_new %>%
group_by(userID) %>%
filter(n() > 15) %>% #enough ratings
ungroup() %>%
filter(issue == "none") %>% #at least one time has entered none
select(userID) %>%
distinct)$userID) %>%
group_by(itemID, issue) %>% #assume only one rating set per user
summarise(count = n()) %>%
mutate(count_total = sum(count)) %>%
pivot_wider(names_from = issue, values_from = count) %>%
mutate_all(~replace_na(., 0)) %>%
mutate(count_total = count_total - unknown) %>%
pivot_longer(cols = c("none", "other", "side_effects", "addiction", "long_term_side_effects", "tolerance"), values_to= "count", names_to = "issue") %>%
mutate(variant = "second version, upper")
estimate_new_unbiased <- df_new %>%
group_by(userID) %>%
mutate(n_issues = sum(issue != "unknown")) %>%
filter(n_issues == n()) %>% # we restrict ourselves to users who answered the issue question for every rating
ungroup() %>%
group_by(itemID, issue) %>% #assume only one rating set per user
summarise(count = n()) %>%
mutate(count_total = sum(count)) %>%
mutate(variant = "second version, unbiased")
The three estimates seem to give coherent results:
estimate_new_upper %>%
bind_rows(estimate_new_lower) %>%
bind_rows(estimate_new_unbiased) %>%
bind_rows(estimate_old) %>%
mutate(nootropic = itemID) %>%
mutate_all(~replace_na(., 0)) %>%
filter(count_total > 0) %>%
rowwise() %>%
mutate(prop = prop.test(count, count_total, conf.level=0.95)$estimate,
prop_low = prop.test(count, count_total, conf.level=0.95)$conf.int[[1]],
prop_high = prop.test(count, count_total, conf.level=0.95)$conf.int[[2]]) %>%
ungroup() %>%
filter(issue == "side_effects") %>%
mutate(nootropic = str_sub(nootropic, 1, 25)) %>%
mutate(nootropic = fct_reorder(nootropic, prop)) %>%
filter(prop_high - prop_low < 0.6) %>%
group_by(nootropic) %>%
filter(n() == 4) %>%
ungroup() %>%
#filter(nootropic %in% sample(levels(nootropic), 5)) %>%
#group_by(nootropic) %>%
#mutate(prop_mean = mean(prop)) %>%
#ungroup() %>%
#filter(rank(-prop) < 15) %>%
ggplot() +
geom_pointinterval(aes(x = prop, xmin=prop_low, xmax=prop_high, y=nootropic, color = variant), position=position_dodge2()) +
scale_x_log10() +
xlab("Probability of side effects") +
ylab("")
To have only one estimate for each nootropic, we’re going to build a probabilistic model taking into account non-response bias.
We only observe reports, not actual issue, so we want to model both \(P(report | issue, issue\_type, item, user, ui)\) and \(P(issue | issue\_type, item, user, ui)\), with \(ui\) begin Nootroflix version.
Some assumption to simplify:
\(P(report | issue, issue\_type, item, user) = P(report | issue, user, ui)\), i.e the report probability doesn’t depend on the nootropic nor on the type of issue (I’d like to investigate the latter when I have more time).
\(P(issue | issue\_type, item, user) = P(issue | issue\_type, item)\)
We also decompose reports into 3 categories:
“none” (0) if the user has entered none (only for the second nootroflix version, as this was the default in the first version), or if the user has checked another issue for the same nootropic (for both Nootroflix versions)
“empty” (1) if the user hasn’t responded (second Nootroflix version) or if the user has entered the default None (for the first nootroflix version). For the first version, we suppose that if a user has selected another issue, it didn’t have other issues with the same nootropics (even if only one choice was possible).
“issue” (2) if the user has checked the issue
We can now write the model:
\(P(issue | issue\_type, item) = \mathcal{B}(p_{issue\_type, item})\). \(p_{issue\_type, item}\) is what we want to estimate
\(P(report = "none" | issue, user, ui) = \mathbb{1}_{issue=False} inv\_logit(\beta^{none}_{ui} + \beta^{none}_{user}))\)
\(P(report = "empty" | issue, user, ui) = \mathbb{1}_{issue=False} inv\_logit(\beta^{empty}_{user} + \beta^{empty}_{ui})) + \mathbb{1}_{issue=False} (1 - P(report = "none" | issue, user, ui))\)
$P(report = “issue” | issue, user, ui) = _{issue=true} (1 - P(report = “empty” | issue, user, ui)) $
Other modeling details: we use a hierarchical model for \(p_{issue\_type, item}\), allowing to have some estimates for nootropics with few ratings, and we put a Beta priors on \(\mathcal{B}_{users}\) to put more probability mass on users responding to most issues questions or none.
You can check out the Stan code on the Github repo.
df <- read_csv("../data/nootroflix_ratings_users_clean.csv") %>%
select(itemID, userID, issue)
df_new <- df %>%
filter(str_detect(issue, "\\[")) %>%
mutate(issue = str_remove(issue, "\\["),
issue = str_remove(issue, "\\]"),
issue = str_remove_all(issue, "\\'"),
issue = str_split(issue, ",")) %>%
unnest(issue) %>%
mutate(issue = case_when(
issue == "" ~ "empty",
str_detect(issue, "None") ~ "none",
str_detect(issue, "Developed addiction") ~ "addiction",
str_detect(issue, "Developed tolerance") ~ "tolerance",
str_detect(issue, "Other issues") ~ "other",
str_detect(issue, "Had to stop because of side effects") ~ "side_effects",
str_detect(issue, "Persistent side effects") ~ "long_term_side_effects"))
df_old <- df %>%
filter(!str_detect(issue, "\\[")) %>%
mutate(issue = case_when(
issue == "" ~ "empty",
str_detect(issue, "None") ~ "empty", #careful
str_detect(issue, "Developed addiction") ~ "addiction",
str_detect(issue, "Developed tolerance") ~ "tolerance",
str_detect(issue, "Other issues") ~ "other",
str_detect(issue, "Had to stop because of side effects") ~ "side_effects",
str_detect(issue, "Persistent side effects") ~ "long_term_side_effects"))
df_for_stan <- df_old %>%
mutate(ui = 1) %>%
bind_rows(df_new %>% mutate(ui = 2)) %>%
mutate(report= case_when(
issue == "none" ~ 0,
issue == "empty" ~ 1,
TRUE ~ 2))
df_for_stan <-
df_for_stan %>%
complete(nesting(itemID, userID, ui), issue, fill=list(report=1)) %>% # get a issue report for all possible issues for items tried by each user
#complete(nesting(itemID, userID), issue, fill=list(report=1)) %>% # get a issue report for all possible issues for items tried by each user
group_by(userID, itemID) %>%
mutate(has_checked_something = if_else(min(report) == 0 | max(report) == 2, T, F)) %>%
mutate(report = if_else(report == 1 & has_checked_something, 0, report)) %>%
filter(issue != "empty", issue != "none") %>%
select(-has_checked_something)
df_for_stan <- df_for_stan %>%
mutate(userID = as.character(userID))
options(mc.cores = parallel::detectCores())
library(cmdstanr)
library(tidybayes)
data <- compose_data(df_for_stan %>%
mutate(users = userID, items=itemID,
issues=issue, report=report, .keep="unused"))
#mod <- cmdstan_model('analysis/issue_analysis.stan')
#mod$check_syntax()
#fit <- mod$sample(
# data = data,
# chains=4,
# iter_warmup = 1000,
# iter_sampling = 1000)
fit <- readRDS("saved_models/model_stan_final")
The stan fit is satisfactory, and as you’ll see below, the results match nicely with the estimates computed above.
estimates <- estimate_new_upper %>%
bind_rows(estimate_new_lower) %>%
bind_rows(estimate_new_unbiased) %>%
bind_rows(estimate_old) %>%
mutate(nootropic = itemID) %>%
mutate_all(~replace_na(., 0)) %>%
filter(count_total > 0) %>%
rowwise() %>%
mutate(prop = prop.test(count, count_total, conf.level=0.95)$estimate,
prop_low = prop.test(count, count_total, conf.level=0.95)$conf.int[[1]],
prop_high = prop.test(count, count_total, conf.level=0.95)$conf.int[[2]]) %>%
ungroup()
estimates_addiction <- estimates %>%
filter(issue == "addiction") %>%
mutate(nootropic = fct_reorder(nootropic, prop)) %>%
group_by(nootropic) %>%
mutate(prop_mean = mean(prop)) %>%
ungroup()
df_fit_addiction <- fit %>%
recover_types(df_for_stan %>%
mutate(users = userID, items=itemID,
issues=issue, report=report, .keep="unused")) %>%
spread_draws(item_issues_proba_true_issue[items, issues]) %>%
filter(issues == "addiction") %>%
mutate(items = as_factor(items)) %>%
mutate(items = fct_reorder(items, item_issues_proba_true_issue)) %>%
mutate(itemID=items, issue = issues) %>%
group_by(itemID, issue) %>%
summarise(prop = median(item_issues_proba_true_issue),
prop_low = quantile(item_issues_proba_true_issue, 0.05),
prop_high = quantile(item_issues_proba_true_issue, 0.95)) %>%
ungroup() %>%
mutate(variant="stan model")
#To show that the model preditions match nicely with the estimates computed above
estimates_addiction %>%
bind_rows(df_fit_addiction) %>%
mutate(itemID = str_sub(itemID, 1, 25)) %>%
group_by(variant) %>%
filter(rank(-prop) < 15) %>%
#group_by(itemID) %>%
#filter(max(prop_high - prop_low) < 0.3) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID, color=variant), position=position_dodge2()) +
scale_x_log10() +
xlab("Log probability of addiction")+
ylab("")
df_fit_addiction %>%
mutate(itemID = as_factor(itemID)) %>%
mutate(itemID = fct_reorder(itemID, prop)) %>%
filter(rank(-prop) < 20) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID)) +
xlab("Probability of addiction") +
ylab("")
ggsave("plots/issues_addiction.jpeg", width=10, height=10, units = "in", limitsize = F, dpi=300)
#All nootropics
plot_addiction_full <- df_fit_addiction %>%
mutate(itemID = as_factor(itemID)) %>%
mutate(itemID = fct_reorder(itemID, prop)) %>%
#filter(rank(-prop) < 20) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID)) +
xlab("Probability of addiction") +
ylab("")
ggsave("plots/issues_addiction_full.jpeg", width=10, height=50, units = "in", limitsize = F, dpi=300, plot=plot_addiction_full)
estimates_tolerance <- estimates %>%
filter(issue == "tolerance") %>%
mutate(nootropic = fct_reorder(nootropic, prop)) %>%
group_by(nootropic) %>%
mutate(prop_mean = mean(prop)) %>%
ungroup()
df_fit_tolerance <- fit %>%
recover_types(df_for_stan %>%
mutate(users = userID, items=itemID,
issues=issue, report=report, .keep="unused")) %>%
spread_draws(item_issues_proba_true_issue[items, issues]) %>%
filter(issues == "tolerance") %>%
mutate(items = as_factor(items)) %>%
mutate(items = fct_reorder(items, item_issues_proba_true_issue)) %>%
mutate(itemID=items, issue = issues) %>%
group_by(itemID, issue) %>%
summarise(prop = median(item_issues_proba_true_issue),
prop_low = quantile(item_issues_proba_true_issue, 0.05),
prop_high = quantile(item_issues_proba_true_issue, 0.95)) %>%
ungroup() %>%
mutate(variant="stan model")
#To show that the model preditions match nicely with the estimates computed above
estimates_tolerance %>%
bind_rows(df_fit_tolerance) %>%
mutate(itemID = str_sub(itemID, 1, 25)) %>%
group_by(variant) %>%
filter(rank(-prop) < 15) %>%
#group_by(itemID) %>%
#filter(max(prop_high - prop_low) < 0.3) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID, color=variant), position=position_dodge2()) +
scale_x_log10() +
xlab("Log probability of tolerance")+
ylab("")
df_fit_tolerance %>%
mutate(itemID = as_factor(itemID)) %>%
mutate(itemID = fct_reorder(itemID, prop)) %>%
filter(rank(-prop) < 20) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID)) +
xlab("Probability of tolerance") +
ylab("")
ggsave("plots/issues_tolerance.jpeg", width=10, height=10, units = "in", limitsize = F, dpi=300)
#All nootropics
plot_tolerance_full <- df_fit_tolerance %>%
mutate(itemID = as_factor(itemID)) %>%
mutate(itemID = fct_reorder(itemID, prop)) %>%
#filter(rank(-prop) < 20) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID)) +
xlab("Probability of tolerance") +
ylab("")
ggsave("plots/issues_tolerance_full.jpeg", width=10, height=50, units = "in", limitsize = F, dpi=300, plot=plot_tolerance_full)
estimates_side_effects <- estimates %>%
filter(issue == "side_effects") %>%
mutate(nootropic = fct_reorder(nootropic, prop)) %>%
group_by(nootropic) %>%
mutate(prop_mean = mean(prop)) %>%
ungroup()
df_fit_side_effects <- fit %>%
recover_types(df_for_stan %>%
mutate(users = userID, items=itemID,
issues=issue, report=report, .keep="unused")) %>%
spread_draws(item_issues_proba_true_issue[items, issues]) %>%
filter(issues == "side_effects") %>%
mutate(items = as_factor(items)) %>%
mutate(items = fct_reorder(items, item_issues_proba_true_issue)) %>%
mutate(itemID=items, issue = issues) %>%
group_by(itemID, issue) %>%
summarise(prop = median(item_issues_proba_true_issue),
prop_low = quantile(item_issues_proba_true_issue, 0.05),
prop_high = quantile(item_issues_proba_true_issue, 0.95)) %>%
ungroup() %>%
mutate(variant="stan model")
#To show that the model preditions match nicely with the estimates computed above
estimates_side_effects %>%
bind_rows(df_fit_side_effects) %>%
mutate(itemID = str_sub(itemID, 1, 25)) %>%
group_by(variant) %>%
filter(rank(-prop) < 15) %>%
#group_by(itemID) %>%
#filter(max(prop_high - prop_low) < 0.3) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID, color=variant), position=position_dodge2()) +
scale_x_log10()+
xlab("Log probability of side_effects") +
ylab("")
df_fit_side_effects %>%
mutate(itemID = as_factor(itemID)) %>%
mutate(itemID = fct_reorder(itemID, prop)) %>%
filter(rank(-prop) < 20) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID)) +
xlab("Probability of side_effects") +
ylab("")
ggsave("plots/issues_side_effects.jpeg", width=10, height=10, units = "in", limitsize = F, dpi=300)
#All nootropics
plot_side_effects_full <- df_fit_side_effects %>%
mutate(itemID = as_factor(itemID)) %>%
mutate(itemID = fct_reorder(itemID, prop)) %>%
#filter(rank(-prop) < 20) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID)) +
xlab("Probability of side_effects") +
ylab("")
ggsave("plots/issues_side_effects_full.jpeg", width=10, height=50, units = "in", limitsize = F, dpi=300, plot=plot_side_effects_full)
estimates_long_term_side_effects <- estimates %>%
filter(issue == "long_term_side_effects") %>%
mutate(nootropic = fct_reorder(nootropic, prop)) %>%
group_by(nootropic) %>%
mutate(prop_mean = mean(prop)) %>%
ungroup()
df_fit_long_term_side_effects <- fit %>%
recover_types(df_for_stan %>%
mutate(users = userID, items=itemID,
issues=issue, report=report, .keep="unused")) %>%
spread_draws(item_issues_proba_true_issue[items, issues]) %>%
filter(issues == "long_term_side_effects") %>%
mutate(items = as_factor(items)) %>%
mutate(items = fct_reorder(items, item_issues_proba_true_issue)) %>%
mutate(itemID=items, issue = issues) %>%
group_by(itemID, issue) %>%
summarise(prop = median(item_issues_proba_true_issue),
prop_low = quantile(item_issues_proba_true_issue, 0.05),
prop_high = quantile(item_issues_proba_true_issue, 0.95)) %>%
ungroup() %>%
mutate(variant="stan model")
#To show that the model preditions match nicely with the estimates computed above
estimates_long_term_side_effects %>%
bind_rows(df_fit_long_term_side_effects) %>%
mutate(itemID = str_sub(itemID, 1, 25)) %>%
group_by(variant) %>%
filter(rank(-prop) < 15) %>%
#group_by(itemID) %>%
#filter(max(prop_high - prop_low) < 0.3) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID, color=variant), position=position_dodge2()) +
scale_x_log10()+
xlab("Log probability of long-term side_effects") +
ylab("")
df_fit_long_term_side_effects %>%
mutate(itemID = as_factor(itemID)) %>%
mutate(itemID = fct_reorder(itemID, prop)) %>%
filter(rank(-prop) < 20) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID)) +
xlab("Probability of long-term side_effects") +
ylab("")
ggsave("plots/issues_long_term_side_effects.jpeg", width=10, height=10, units = "in", limitsize = F, dpi=300)
#All nootropics
plot_long_term_side_effects_full <- df_fit_long_term_side_effects %>%
mutate(itemID = as_factor(itemID)) %>%
mutate(itemID = fct_reorder(itemID, prop)) %>%
#filter(rank(-prop) < 20) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID)) +
xlab("Probability of long-term side_effects") +
ylab("")
ggsave("plots/issues_long_term_side_effects_full.jpeg", width=10, height=50, units = "in", limitsize = F, dpi=300, plot=plot_long_term_side_effects_full)
estimates_other <- estimates %>%
filter(issue == "other") %>%
mutate(nootropic = fct_reorder(nootropic, prop)) %>%
group_by(nootropic) %>%
mutate(prop_mean = mean(prop)) %>%
ungroup()
df_fit_other <- fit %>%
recover_types(df_for_stan %>%
mutate(users = userID, items=itemID,
issues=issue, report=report, .keep="unused")) %>%
spread_draws(item_issues_proba_true_issue[items, issues]) %>%
filter(issues == "other") %>%
mutate(items = as_factor(items)) %>%
mutate(items = fct_reorder(items, item_issues_proba_true_issue)) %>%
mutate(itemID=items, issue = issues) %>%
group_by(itemID, issue) %>%
summarise(prop = median(item_issues_proba_true_issue),
prop_low = quantile(item_issues_proba_true_issue, 0.05),
prop_high = quantile(item_issues_proba_true_issue, 0.95)) %>%
ungroup() %>%
mutate(variant="stan model")
#To show that the model preditions match nicely with the estimates computed above
estimates_other %>%
bind_rows(df_fit_other) %>%
mutate(itemID = str_sub(itemID, 1, 25)) %>%
group_by(variant) %>%
filter(rank(-prop) < 15) %>%
#filter(min(prop_high - prop_low) < 0.02) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID, color=variant), position=position_dodge2()) +
scale_x_log10() +
xlab("Log probability of other issues")+
ylab("")
df_fit_other %>%
mutate(itemID = as_factor(itemID)) %>%
mutate(itemID = fct_reorder(itemID, prop)) %>%
filter(rank(-prop) < 20) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID)) +
xlab("Probability of other issues") +
ylab("")
ggsave("plots/issues_other.jpeg", width=10, height=10, units = "in", limitsize = F, dpi=300)
#All nootropics
plot_others_full <- df_fit_other %>%
mutate(itemID = as_factor(itemID)) %>%
mutate(itemID = fct_reorder(itemID, prop)) %>%
#filter(rank(-prop) < 20) %>%
ggplot() +
geom_pointinterval(aes(x=prop, xmax=prop_high, xmin=prop_low, y = itemID)) +
xlab("Probability of other issues") +
ylab("")
ggsave("plots/issues_other_full.jpeg", width=10, height=50, units = "in", limitsize = F, dpi=300, plot=plot_others_full)
# Save all results in a big table
fit %>%
recover_types(df_for_stan %>%
mutate(users = userID, items=itemID,
issues=issue, report=report, .keep="unused")) %>%
spread_draws(item_issues_proba_true_issue[items, issues]) %>%
mutate(items = as_factor(items)) %>%
mutate(nootropic=items, issue = issues) %>%
group_by(nootropic, issue) %>%
summarise(prop = median(item_issues_proba_true_issue),
prop_low = quantile(item_issues_proba_true_issue, 0.05),
prop_high = quantile(item_issues_proba_true_issue, 0.95)) %>%
ungroup() %>%
mutate(variant="stan model") %>%
select(nootropic, issue, variant, prop, prop_low, prop_high) %>%
bind_rows(estimates) %>%
write_csv("issues_summary.csv")