Analysis walkthrough for the paper
Zettersten, M., & Saffran, J. (accepted). Sampling to learn words: Adults and children sample words that reduce referential ambiguity. Developmental Science.
For more background information and the underlying data, visit the OSF repository for this report: OSF site
See crossact_codebook for information about individual columns.
d <- read.csv(data_path)Overview over exclusions in each experiment.
##exclusions
exclusions <-  d %>%
  group_by(subject,experiment_name) %>%
  summarize(Exclude=Exclude[1]) %>%
  ungroup()  %>%
  group_by(experiment_name) %>%
  summarize(N=n(),num_exclusions=sum(Exclude!="N"))
kable(exclusions)| experiment_name | N | num_exclusions | 
|---|---|---|
| Experiment 1 | 31 | 3 | 
| Experiment 2 | 40 | 2 | 
| Experiment 3 | 58 | 2 | 
| Experiment S1 | 62 | 0 | 
#remove excluded participants
d <- d %>%
  filter(Exclude=="N")Overview over demographic characteristics of each sample.
##demographics (exclusions filtered)
demographics <- d %>%
  filter(Exclude=="N") %>%
  group_by(subject,age_group,experiment_name) %>%
  summarize(Gender=Gender[1],Age=Age[1],L1_english=L1_english[1],languages_besides_english_yn=languages_besides_english_yn[1],L1=L1[1],L1percent=L1percent[1]) %>%
  ungroup() %>%
  group_by(age_group,experiment_name)  %>%
  summarize(
    N=n(),
    gender_f=sum(Gender=="female"),
    mean_age=round(mean(Age,na.rm=T),2),
    sd_age=round(sd(Age,na.rm=T),2),
    min_age=round(min(Age,na.rm=T),2),
    max_age=round(max(Age,na.rm=T),2),
    native_english=ifelse(is.na(sum(L1_english=="English")),sum(L1=="English",na.rm=T),sum(L1_english=="English",na.rm=T)),
    languages_besides_english=ifelse(age_group[1]=="adults",NA,sum(languages_besides_english_yn=="Yes",na.rm=T)),
    monolingual=ifelse(age_group[1]=="adults",NA,sum(L1percent>=90,na.rm=T))
  )demographics %>%
  filter(experiment_name=="Experiment 1") %>%
  kable()| age_group | experiment_name | N | gender_f | mean_age | sd_age | min_age | max_age | native_english | languages_besides_english | monolingual | 
|---|---|---|---|---|---|---|---|---|---|---|
| adults | Experiment 1 | 28 | 8 | 31.39 | 7.25 | 19 | 48 | 28 | NA | NA | 
demographics %>%
  filter(experiment_name=="Experiment 2") %>%
  kable()| age_group | experiment_name | N | gender_f | mean_age | sd_age | min_age | max_age | native_english | languages_besides_english | monolingual | 
|---|---|---|---|---|---|---|---|---|---|---|
| kids | Experiment 2 | 38 | 19 | 5.9 | 1.19 | 4.1 | 8.12 | 38 | 6 | 32 | 
d %>%
  filter(Exclude=="N"&experiment_name=="Experiment 2") %>%
  group_by(subject) %>%
  summarize(hispanic=hispanic[1],ethnicity=ethnicity[1]) %>%
  ungroup() %>%
  group_by(hispanic,ethnicity) %>%
  summarize(count=n()) %>%
  kable()| hispanic | ethnicity | count | 
|---|---|---|
| No | Asian | 1 | 
| No | Asian,White | 2 | 
| No | Black or African American,White | 1 | 
| No | Other | 1 | 
| No | White | 30 | 
| Yes | Other | 1 | 
| Yes | White | 1 | 
| NA | White | 1 | 
Note that we are currently missing some demographic information (ethnicity & language) for 6 of the 56 participants, due to experimenter error.
demographics %>%
  filter(experiment_name=="Experiment 3") %>%
  kable()| age_group | experiment_name | N | gender_f | mean_age | sd_age | min_age | max_age | native_english | languages_besides_english | monolingual | 
|---|---|---|---|---|---|---|---|---|---|---|
| kids | Experiment 3 | 56 | 33 | 5.53 | 1.18 | 3.29 | 7.88 | 50 | 17 | 38 | 
d %>%
  filter(Exclude=="N"&experiment_name=="Experiment 3") %>%
  group_by(subject) %>%
  summarize(hispanic=hispanic[1],ethnicity=ethnicity[1]) %>%
  ungroup() %>%
  group_by(hispanic,ethnicity) %>%
  summarize(count=n()) %>%
  kable()| hispanic | ethnicity | count | 
|---|---|---|
| No | Asian | 8 | 
| No | Asian,Black or African American | 1 | 
| No | Asian,White | 1 | 
| No | Black or African American | 1 | 
| No | Black or African American,White | 1 | 
| No | Black or African American,White,Other | 1 | 
| No | Other | 1 | 
| No | White | 33 | 
| Yes | Other | 1 | 
| Yes | White | 1 | 
| NA | White | 1 | 
| NA | NA | 6 | 
demographics %>%
  filter(experiment_name=="Experiment S1") %>%
  kable()| age_group | experiment_name | N | gender_f | mean_age | sd_age | min_age | max_age | native_english | languages_besides_english | monolingual | 
|---|---|---|---|---|---|---|---|---|---|---|
| adults | Experiment S1 | 62 | 27 | 19.13 | 1.01 | 18 | 22 | 56 | NA | NA | 
First, we summarize sampling and test behavior by participant and store these objects for later plotting and analysis.
#Summarize sampling and test behavior by subject
#used in later analyses and plotting
#selections by subject
subj_selection <- d %>%
  filter(trialType=="selection") %>%
  group_by(experiment_name,ambiguity_condition,subject) %>%
  summarize(
    N=n(),
    prop_ambig_selection=sum(selectionType!="low")/N,
    num_ambig_selection=sum(selectionType!="low"))
subj_selection %>%
  arrange(experiment_name,ambiguity_condition,subject) %>%
  DT::datatable()#test performance by subject
subj_test <- d %>%
  filter(trialType=="test") %>%
  group_by(experiment_name,ambiguity_condition,subject) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
subj_test %>%
  arrange(experiment_name,ambiguity_condition,subject) %>%
  DT::datatable()#test performance split by item type (ambiguous vs. non-ambiguous)
subj_test_item <- d %>%
  filter(trialType=="test") %>%
  group_by(experiment_name,ambiguity_condition,subject,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
subj_test_item %>%
  arrange(experiment_name,ambiguity_condition,subject) %>%
  DT::datatable()Analysis of learners’ sampling preferences.
Plot is based on estimates from the logistic mixed-effects model
#create data frame with model predictions
m <- glmer(isAmbiguous~(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 1"&trialType=="selection"&ambiguity_condition=="ambiguous"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
model_pred <- data.frame(ambiguity_condition="ambiguous")
pY <- predictSE(m,model_pred, type="response")
model_pred <- model_pred %>%
  mutate(
    experiment_name= "Experiment 1",
    prop_ambiguous = pY$fit,
    prop_ambiguous_lower_ci = pY$fit - 1.96*pY$se.fit,
    prop_ambiguous_upper_ci = pY$fit + 1.96*pY$se.fit)
#create plot
p_exp1_sampling <- ggplot(subset(model_pred,ambiguity_condition=="ambiguous"),aes(x=experiment_name,y=prop_ambiguous,color=ambiguity_condition,fill=ambiguity_condition))+
  geom_bar(stat="identity",size=2.5,fill="white",width=0.5)+
  geom_dotplot(data=subset(subj_selection,experiment_name=="Experiment 1"&ambiguity_condition=="ambiguous"), aes(y=prop_ambig_selection),binaxis="y",stackdir="center",alpha=0.5,dotsize=0.6)+
  geom_errorbar(aes(ymin=prop_ambiguous_lower_ci,ymax=prop_ambiguous_upper_ci),width=0,size=1.2)+
  ylab("Probability of \nAmbiguous Selection")+
  geom_hline(yintercept=0.5,linetype="dotted")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  theme_classic(base_size=24)+
  theme(legend.position="none")+
  scale_x_discrete(name="Experiment 1")+
  theme(axis.ticks.x = element_blank(), axis.text.x = element_blank())
p_exp1_sampling#descriptives
subj_summary_1 <-  subj_selection %>%
  filter(experiment_name=="Experiment 1") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_1)| ambiguity_condition | N | prop_ambiguous | prop_ambiguous_lower_ci | prop_ambiguous_upper_ci | 
|---|---|---|---|---|
| ambiguous | 28 | 0.625 | 0.5084928 | 0.7415072 | 
m <- glmer(isAmbiguous~(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 1"&trialType=="selection"&ambiguity_condition=="ambiguous"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 1" & trialType == "selection" &  
##     ambiguity_condition == "ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    150.9    159.1    -72.5    144.9      109 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4998 -0.9396  0.5155  0.6668  1.0642 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.845    0.9193  
##  choiceImage (Intercept) 0.000    0.0000  
## Number of obs: 112, groups:  subject, 28; choiceImage, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   0.6155     0.2845   2.163   0.0305 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1confint(m,method="Wald")[3,]##      2.5 %     97.5 % 
## 0.05777627 1.17315329Note that we ignore a singular fit warning here. This appears to be caused by the inclusion of a by-item random intercept - however, it does not appear to adversely affect the model fit, and a simplified model with the by-stimulus random intercept removed yields virtually identical results.
wilcox.test(filter(subj_selection, experiment_name=="Experiment 1"&ambiguity_condition=="ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  filter(subj_selection, experiment_name == "Experiment 1" & ambiguity_condition ==     "ambiguous")$prop_ambig_selection
## V = 151.5, p-value = 0.02026
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.5000775 0.8750528
## sample estimates:
## (pseudo)median 
##       0.625024Analysis of learners’ test performance.
## split by item, within-subjects corrected CIs
subj_summary_test_item_1 <-  summarySEwithin(
  filter(
    subj_test_item,
    experiment_name=="Experiment 1"),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci
  ) %>%
  select(-accuracy_norm,-sd,-se,-ci)
## by-item test plot
ggplot(subj_summary_test_item_1,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_point(data=filter(subj_test_item,experiment_name=="Experiment 1"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","fully\nambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))## descriptives
subj_summary_test_1 <-  subj_test %>%
  filter(experiment_name=="Experiment 1") %>%
  group_by(experiment_name,ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_correct=mean(accuracy,na.rm=T),
    ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
    prop_correct_lower_ci=prop_correct-ci,
    prop_correct_upper_ci=prop_correct+ci
  ) %>%
  select(-ci)
kable(subj_summary_test_1)| experiment_name | ambiguity_condition | N | prop_correct | prop_correct_lower_ci | prop_correct_upper_ci | 
|---|---|---|---|---|---|
| Experiment 1 | ambiguous | 28 | 0.65625 | 0.5265768 | 0.7859232 | 
kable(subj_summary_test_item_1)| experiment_name | ambiguity_condition | targetIsAmbiguousYN | N | accuracy | lower_ci | upper_ci | 
|---|---|---|---|---|---|---|
| Experiment 1 | ambiguous | no | 28 | 0.6785714 | 0.5909601 | 0.7661827 | 
| Experiment 1 | ambiguous | yes | 28 | 0.6339286 | 0.5463173 | 0.7215399 | 
## testing overall accuracy against chance
d$offset.125 <- 1/8
m <- glmer(isRight~offset(logit(offset.125))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 1"&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## isRight ~ offset(logit(offset.125)) + (1 | subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 1" & trialType == "test")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    236.0    246.3   -115.0    230.0      221 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1422 -0.4954  0.2084  0.5398  2.0369 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 5.30551  2.3034  
##  targetImage (Intercept) 0.09789  0.3129  
## Number of obs: 224, groups:  subject, 28; targetImage, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.1893     0.5097   6.258 3.91e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1## testing difference between items
d$targetIsAmbiguousC <- ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==1,0.5,
                                        ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==0,-0.5,NA))
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment 1")&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: filter(d, (experiment_name == "Experiment 1") & trialType ==  
##     "test")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    233.5    253.9   -110.7    221.5      218 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0032 -0.4774  0.0571  0.3672  1.7067 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr 
##  subject     (Intercept)        9.14612  3.024         
##              targetIsAmbiguousC 8.80532  2.967    -0.98
##  targetImage (Intercept)        0.01905  0.138         
## Number of obs: 224, groups:  subject, 28; targetImage, 8
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)          1.6280     0.7177   2.268   0.0233 *
## targetIsAmbiguousC  -1.5652     0.9516  -1.645   0.1000  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## trgtIsAmbgC -0.823confint(m, method="Wald")[5:6,]##                         2.5 %    97.5 %
## (Intercept)         0.2213282 3.0345876
## targetIsAmbiguousC -3.4303089 0.2999958#join sampling and test
subj_selection <- subj_selection %>%
  left_join(subj_test,by=c("subject","experiment_name","ambiguity_condition"))
#correlation between preference for sampling ambiguous items and test performance
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 1")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 1")$prop_ambig_selection)## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==  and subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==     "Experiment 1")$accuracy and     "Experiment 1")$prop_ambig_selection
## t = 3.6465, df = 26, p-value = 0.001167
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2664476 0.7845213
## sample estimates:
##       cor 
## 0.5817003Plot correlation between sampling preference and test accuracy
p_exp1_sampling_test <- ggplot(filter(subj_selection,experiment_name=="Experiment 1"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  scale_color_manual(limits=c("ambiguous"),
                     values=c("#E41A1C"))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=24)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Probability of \nAmbiguous Selection")
p_exp1_sampling_testAnalysis of learners’ sampling preferences.
ggplot(filter(subj_selection,experiment_name=="Experiment 2"),aes(x=num_ambig_selection,fill=as.factor(num_ambig_selection),color=as.factor(num_ambig_selection)))+
  scale_fill_brewer(palette="Set1",direction=-1)+
  scale_color_brewer(palette="Set1",direction=-1)+
  geom_bar(stat="count",size=1.5,alpha=0.2,width=0.5)+
  geom_vline(xintercept=4/3,linetype="dashed")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16),
        legend.position="none")+
  ylab("Number of subjects")+
  xlab("Number of ambiguous selections")+
  #geom_density(aes(group=1, y=..count../1.3))+
  scale_x_continuous(breaks=c(0,1,2,3,4), limits=c(-0.5,4))+
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30), limits=c(0,20))#descriptives
subj_summary_2 <-  subj_selection %>%
  filter(experiment_name=="Experiment 2") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_2)| ambiguity_condition | N | prop_ambiguous | prop_ambiguous_lower_ci | prop_ambiguous_upper_ci | 
|---|---|---|---|---|
| ambiguous | 38 | 0.3289474 | 0.2712987 | 0.386596 | 
d$offset.33 <- 1/3
m <- glmer(isAmbiguous~offset(logit(offset.33))+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ offset(logit(offset.33)) + (1 | subject) + (1 |  
##     choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 2" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    198.6    207.6    -96.3    192.6      149 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.7001 -0.7001 -0.7001  1.4283  1.4283 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0        0       
##  choiceImage (Intercept) 0        0       
## Number of obs: 152, groups:  subject, 38; choiceImage, 6
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.0198     0.1726  -0.115    0.909confint(m, method="Wald")[3,]##      2.5 %     97.5 % 
## -0.3581671  0.3185617Note that we ignore a singular fit warning here. This appears to be caused by the inclusion of a by-item random intercept - however, it does not appear to adversely affect the model fit, and a simplified model with the by-stimulus random intercept removed yields virtually identical results.
wilcox.test(filter(subj_selection,experiment_name=="Experiment 2")$prop_ambig_selection,mu=1/3, conf.int=T, conf.level=0.95)## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  filter(subj_selection, experiment_name == "Experiment 2")$prop_ambig_selection
## V = 409, p-value = 0.5734
## alternative hypothesis: true location is not equal to 0.3333333
## 95 percent confidence interval:
##  0.2500390 0.3750141
## sample estimates:
## (pseudo)median 
##      0.3749301#predict ambiguous selections from age
m <- glmer(isAmbiguous~Age+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ Age + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 2" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    200.6    212.7    -96.3    192.6      148 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.7034 -0.7006 -0.6989  1.4260  1.4334 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0        0       
##  choiceImage (Intercept) 0        0       
## Number of obs: 152, groups:  subject, 38; choiceImage, 6
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.737329   0.881389  -0.837    0.403
## Age          0.004131   0.146437   0.028    0.977
## 
## Correlation of Fixed Effects:
##     (Intr)
## Age -0.981confint(m, method="Wald")[3:4,]##                  2.5 %    97.5 %
## (Intercept) -2.4648196 0.9901613
## Age         -0.2828792 0.2911415#plot model predictions
pX <- data.frame(Age=seq(min(subset(d,experiment_name=="Experiment 2"&trialType=="selection")$Age,na.rm=T),max(subset(d,experiment_name=="Experiment 2"&trialType=="selection")$Age,na.rm=T),by=0.1))
pY <- predictSE(m,pX,re.form=NA,type="response")
pX$isAmbiguous <- pY$fit
pX$YLower <- pY$fit-pY$se.fit
pX$YUpper <- pY$fit+pY$se.fit
ggplot(pX,aes(Age,isAmbiguous))+
  geom_violinh(data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),scale="count",width=0.1, trim=F)+
  geom_jitter(data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),height=0.01)+
  geom_smooth(aes(ymin=YLower,ymax=YUpper),stat="identity",color="#E41A1C",fill="#E41A1C")+
  geom_hline(yintercept=1/3,linetype="dotted")+
  theme_classic(base_size=16)+
  xlab("Age (in years)")+
  scale_x_continuous(breaks=c(3,4,5,6,7,8))+
  ylab("Proportion of ambiguous selections")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))#summarize across participants by item type
subj_summary_item_2 <- summarySEwithin(
  filter(
    subj_test_item,
    (experiment_name=="Experiment 2")),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci
  ) %>%
  select(-accuracy_norm,-sd,-se,-ci)
## create plot
ggplot(subj_summary_item_2,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_dotplot(data=filter(subj_test_item,experiment_name=="Experiment 2"),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/6,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))##descriptives
#overall
subj_summary_test_2 <-  subj_test %>%
  filter(experiment_name=="Experiment 2") %>%
  group_by(experiment_name,ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_correct=mean(accuracy,na.rm=T),
    ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
    prop_correct_lower_ci=prop_correct-ci,
    prop_correct_upper_ci=prop_correct+ci
  ) %>%
  select(-ci)
kable(subj_summary_test_2)| experiment_name | ambiguity_condition | N | prop_correct | prop_correct_lower_ci | prop_correct_upper_ci | 
|---|---|---|---|---|---|
| Experiment 2 | ambiguous | 38 | 0.3859649 | 0.3073429 | 0.4645869 | 
#by item type
subj_summary_item_2 %>%
  kable()| experiment_name | ambiguity_condition | targetIsAmbiguousYN | N | accuracy | lower_ci | upper_ci | 
|---|---|---|---|---|---|---|
| Experiment 2 | ambiguous | no | 38 | 0.3355263 | 0.2484583 | 0.4225943 | 
| Experiment 2 | ambiguous | yes | 38 | 0.4868421 | 0.3997741 | 0.5739101 | 
## testing overall accuracy against chance
d$offset.17 <- 1/6
m <- glmer(isRight~offset(logit(offset.17))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 2"&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ offset(logit(offset.17)) + (1 | subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 2" & trialType == "test")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    306.6    316.9   -150.3    300.6      224 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0598 -0.7565 -0.6739  1.0550  1.4839 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.3354   0.5791  
##  targetImage (Intercept) 0.0000   0.0000  
## Number of obs: 227, groups:  subject, 38; targetImage, 6
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.1131     0.1723   6.461 1.04e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1## accuracy by item type
##logistic mixed=effects model
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 2"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: subset(d, trialType == "test" & experiment_name == "Experiment 2")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    307.5    328.0   -147.7    295.5      221 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3639 -0.7441 -0.5889  1.0369  1.6980 
## 
## Random effects:
##  Groups      Name               Variance  Std.Dev. Corr
##  subject     (Intercept)        0.3840684 0.61973      
##              targetIsAmbiguousC 0.0001614 0.01271  1.00
##  targetImage (Intercept)        0.0000000 0.00000      
## Number of obs: 227, groups:  subject, 38; targetImage, 6
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         -0.3970     0.1824  -2.176   0.0296 *
## targetIsAmbiguousC   0.6795     0.3046   2.231   0.0257 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## trgtIsAmbgC 0.209confint(m, method="Wald")[5:6,]##                          2.5 %      97.5 %
## (Intercept)        -0.75450961 -0.03940061
## targetIsAmbiguousC  0.08253043  1.27644793Which items (ambiguous vs. disambiguated) do participants choose on each test trial type? Here, we asked what proportion of the time participants select one of the two ambiguous items when the target label is for an ambiguous vs. a disambiguated item (regardless of accuracy).
## by choice type
# Which items (ambiguous vs. disambiguated) do participants choose on each test trial type?
d$testChoiceType <- ifelse(d$trialType=="test"&(as.character(d$choiceImage)==d$High1|as.character(d$choiceImage)==d$High2),"ambiguous",
                           ifelse(d$trialType=="test","disambiguated",NA))
#summarize choice tendency by participant
subj_test_choiceType_2 <- d %>%
  filter(trialType=="test"&experiment_name  == "Experiment 2") %>%
  group_by(subject,experiment_name,ambiguity_condition,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    ambiguous_choice=mean(testChoiceType=="ambiguous",na.rm=T))
#summarize across participants
subj_summary_choiceType_2 <- summarySEwithin(
  subj_test_choiceType_2,
  "ambiguous_choice",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = ambiguous_choice - ci,
    upper_ci = ambiguous_choice + ci
  ) %>%
  select(-ambiguous_choice_norm,-sd,-se,-ci)
kable(subj_summary_choiceType_2)| experiment_name | ambiguity_condition | targetIsAmbiguousYN | N | ambiguous_choice | lower_ci | upper_ci | 
|---|---|---|---|---|---|---|
| Experiment 2 | ambiguous | no | 38 | 0.1842105 | 0.0798448 | 0.2885762 | 
| Experiment 2 | ambiguous | yes | 38 | 0.6184211 | 0.5140554 | 0.7227867 | 
##plot
ggplot(subj_summary_choiceType_2,aes(x=targetIsAmbiguousYN,y=ambiguous_choice,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_dotplot(data=subj_test_choiceType_2,alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","ambiguous"))+
  ylab("Proportion Ambiguous Items Selected")+
  geom_hline(yintercept=1/3,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))#summarize test accuracy by choice
subj_test_choice_2 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 2")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T)) 
subj_summary_choice_2 <- summarySEwithin(
  subj_test_choice_2,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )
subj_summary_choice_2 %>%
  select(-accuracy_norm, -sd,-se,-ci) %>%
  kable()| experiment_name | ambiguity_condition | chosen | N | accuracy | lower_ci | upper_ci | chosen_factor | 
|---|---|---|---|---|---|---|---|
| Experiment 2 | ambiguous | 0 | 38 | 0.2675439 | 0.1621150 | 0.3729727 | NOT SAMPLED | 
| Experiment 2 | ambiguous | 1 | 38 | 0.4517544 | 0.3463255 | 0.5571833 | SAMPLED | 
subj_test_item_choice_2 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 2")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
subj_summary_item_choice_2 <- summarySEwithin(
  subj_test_item_choice_2,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen","targetIsAmbiguousYN"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )
ggplot(subj_summary_item_choice_2,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))+
  facet_wrap(~chosen_factor)d$chosenC <- ifelse(!is.na(d$chosen)&d$chosen==0,-0.5,
            ifelse(!is.na(d$chosen)&d$chosen==1,0.5,NA))
m <- glmer(isRight~targetIsAmbiguousC*chosenC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 2"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguousC * chosenC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: subset(d, trialType == "test" & experiment_name == "Experiment 2")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    302.2    329.6   -143.1    286.2      219 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5720 -0.7516 -0.4534  0.9339  2.4960 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr 
##  subject     (Intercept)        0.44890  0.6700        
##              targetIsAmbiguousC 0.03932  0.1983   -1.00
##  targetImage (Intercept)        0.00000  0.0000        
## Number of obs: 227, groups:  subject, 38; targetImage, 6
## 
## Fixed effects:
##                            Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                 -0.5662     0.2014  -2.811  0.00494 **
## targetIsAmbiguousC           0.8276     0.3441   2.405  0.01618 * 
## chosenC                      0.8896     0.3349   2.656  0.00790 **
## targetIsAmbiguousC:chosenC  -0.4927     0.6997  -0.704  0.48135   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trgIAC chosnC
## trgtIsAmbgC  0.048              
## chosenC     -0.330  0.037       
## trgtIsAmC:C  0.042 -0.412  0.158confint(m, method="Wald")[5:8,]##                                 2.5 %    97.5 %
## (Intercept)                -0.9610699 -0.171419
## targetIsAmbiguousC          0.1530674  1.502058
## chosenC                     0.2332100  1.545923
## targetIsAmbiguousC:chosenC -1.8640073  0.878679## preference for sampling ambiguous items and test performance
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 2")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 2")$prop_ambig_selection)## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==  and subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==     "Experiment 2")$accuracy and     "Experiment 2")$prop_ambig_selection
## t = 1.3557, df = 36, p-value = 0.1836
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1068138  0.5045310
## sample estimates:
##       cor 
## 0.2203953##plot
ggplot(filter(subj_selection,experiment_name=="Experiment 2"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  scale_color_manual(limits=c("ambiguous"),
                     values=c("#E41A1C"))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=24)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Probability of \nAmbiguous Selection")Analysis of learners’ sampling preferences.
ggplot(filter(subj_selection,experiment_name=="Experiment 3"),aes(x=num_ambig_selection,fill=as.factor(num_ambig_selection),color=as.factor(num_ambig_selection)))+
  scale_fill_brewer(palette="Set1",direction=-1)+
  scale_color_brewer(palette="Set1",direction=-1)+
  geom_bar(stat="count",size=1.5,alpha=0.2,width=0.5)+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16),
        legend.position="none")+
  ylab("Number of subjects")+
  xlab("Number of ambiguous selections")+
  scale_x_continuous(breaks=c(0,1,2))+
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30))#descriptives
subj_summary_3 <-  subj_selection %>%
  filter(experiment_name=="Experiment 3") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_3)| ambiguity_condition | N | prop_ambiguous | prop_ambiguous_lower_ci | prop_ambiguous_upper_ci | 
|---|---|---|---|---|
| ambiguous_me | 56 | 0.6339286 | 0.5436849 | 0.7241723 | 
m=glmer(isAmbiguous ~ 1+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ 1 + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 3" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    153.1    161.3    -73.6    147.1      109 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3189 -1.3126  0.7582  0.7601  0.7618 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev. 
##  subject     (Intercept) 2.663e-15 5.160e-08
##  choiceImage (Intercept) 1.529e-03 3.911e-02
## Number of obs: 112, groups:  subject, 56; choiceImage, 4
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   0.5498     0.2031   2.707  0.00679 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1confint(m,method="Wald")[3,]##     2.5 %    97.5 % 
## 0.1517166 0.9479760Note that we ignore a singular fit warning here. This appears to be caused by the inclusion of a by-item random intercept - however, it does not appear to adversely affect the model fit, and a simplified model with the by-stimulus random intercept removed yields virtually identical results.
wilcox.test(filter(subj_selection,experiment_name=="Experiment 3")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  filter(subj_selection, experiment_name == "Experiment 3")$prop_ambig_selection
## V = 330, p-value = 0.005553
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.5000378 1.0000000
## sample estimates:
## (pseudo)median 
##      0.9999326#predict ambiguous selections from age
m <- glmer(isAmbiguous~Age+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),family=binomial)
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ Age + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 3" & trialType == "selection")
## 
##      AIC      BIC   logLik deviance df.resid 
##    148.4    159.3    -70.2    140.4      108 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2833 -1.0456  0.5800  0.7368  1.2408 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev.
##  subject     (Intercept) 1.081e-08 0.000104
##  choiceImage (Intercept) 2.176e-03 0.046648
## Number of obs: 112, groups:  subject, 56; choiceImage, 4
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -1.9284     1.0046  -1.919   0.0549 .
## Age           0.4551     0.1837   2.478   0.0132 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##     (Intr)
## Age -0.978confint(m, method="Wald")[3:4,]##                   2.5 %     97.5 %
## (Intercept) -3.89741195 0.04068293
## Age          0.09510293 0.81512510#plot model predictions
pX <- data.frame(Age=seq(min(subset(d,experiment_name=="Experiment 3"&trialType=="selection")$Age,na.rm=T),max(subset(d,experiment_name=="Experiment 3"&trialType=="selection")$Age,na.rm=T),by=0.1))
pY <- predictSE(m,pX,re.form=NA,type="response")
pX$isAmbiguous <- pY$fit
pX$YLower <- pY$fit-pY$se.fit
pX$YUpper <- pY$fit+pY$se.fit
ggplot(pX,aes(Age,isAmbiguous))+
  geom_violinh(data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),scale="count",width=0.1, trim=F)+
  geom_jitter(data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),height=0.01)+
  geom_smooth(aes(ymin=YLower,ymax=YUpper),stat="identity",color="#E41A1C",fill="#E41A1C")+
  geom_hline(yintercept=0.5,linetype="dotted")+
  theme_classic(base_size=16)+
  xlab("Age (in years)")+
  scale_x_continuous(breaks=c(3,4,5,6,7,8))+
  ylab("Proportion of ambiguous selections")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))Analysis of learners’ test accuracy.
##plot by item type
#by item type
subj_summary_item_3 <- summarySEwithin(
  filter(
    subj_test_item,
    (experiment_name=="Experiment 3")),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci
  )
ggplot(subj_summary_item_3,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_dotplot(data=filter(subj_test_item,experiment_name=="Experiment 3"),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("mutual exclusivity","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("mutual exclusivity","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("mutual exclusivity","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))## descriptives
subj_summary_test_3 <-  subj_test %>%
  filter(experiment_name=="Experiment 3") %>%
  group_by(experiment_name,ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_correct=mean(accuracy,na.rm=T),
    ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
    prop_correct_lower_ci=prop_correct-ci,
    prop_correct_upper_ci=prop_correct+ci)
subj_summary_test_3 %>%
  select(-ci) %>%
  kable()| experiment_name | ambiguity_condition | N | prop_correct | prop_correct_lower_ci | prop_correct_upper_ci | 
|---|---|---|---|---|---|
| Experiment 3 | ambiguous_me | 56 | 0.5758929 | 0.4839003 | 0.6678855 | 
subj_summary_item_3 %>%
  select(-accuracy_norm,-se,-sd,-ci) %>%
  kable()| experiment_name | ambiguity_condition | targetIsAmbiguousYN | N | accuracy | lower_ci | upper_ci | 
|---|---|---|---|---|---|---|
| Experiment 3 | ambiguous_me | no | 56 | 0.6160714 | 0.5280317 | 0.7041111 | 
| Experiment 3 | ambiguous_me | yes | 56 | 0.5357143 | 0.4476746 | 0.6237540 | 
##logistic mixed-effects model
d$offset.25 <- 1/4
#overall
m <- glmer(isRight~offset(logit(offset.25))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 3"&trialType=="test"),family=binomial)
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ offset(logit(offset.25)) + (1 | subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 3" & trialType == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##    289.9    300.2   -142.0    283.9      221 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6324 -0.6345  0.3944  0.6126  1.2762 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 2.17173  1.4737  
##  targetImage (Intercept) 0.09708  0.3116  
## Number of obs: 224, groups:  subject, 56; targetImage, 4
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.5272     0.3007   5.079 3.79e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1d$offset.25 <- 1/4
##logistic mixed-effects model
#by item type
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 3"&trialType=="test"),family=binomial)
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 3" & trialType == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##    293.5    314.0   -140.8    281.5      218 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7428 -0.6127  0.3710  0.5865  1.3274 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr
##  subject     (Intercept)        2.5206   1.5876       
##              targetIsAmbiguousC 0.7476   0.8646   0.39
##  targetImage (Intercept)        0.1001   0.3164       
## Number of obs: 224, groups:  subject, 56; targetImage, 4
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)
## (Intercept)          0.4427     0.3168   1.397    0.162
## targetIsAmbiguousC  -0.4392     0.3615  -1.215    0.224
## 
## Correlation of Fixed Effects:
##             (Intr)
## trgtIsAmbgC 0.064confint(m,method="Wald")[5:6,]##                         2.5 %    97.5 %
## (Intercept)        -0.1781934 1.0636353
## targetIsAmbiguousC -1.1478162 0.2693513#summarize test accuracy by choice
subj_test_choice_3 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 3")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
subj_summary_choice_3 <- summarySEwithin(
  subj_test_choice_3,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )
subj_summary_choice_3 %>%
  select(-accuracy_norm, -sd,-se,-ci) %>%
  kable()| experiment_name | ambiguity_condition | chosen | N | accuracy | lower_ci | upper_ci | chosen_factor | 
|---|---|---|---|---|---|---|---|
| Experiment 3 | ambiguous_me | 0 | 56 | 0.5595238 | 0.4653227 | 0.6537249 | NOT SAMPLED | 
| Experiment 3 | ambiguous_me | 1 | 56 | 0.5982143 | 0.5040132 | 0.6924154 | SAMPLED | 
subj_test_item_choice_3 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 3")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
subj_summary_item_choice_3 <- summarySEwithin(
  subj_test_item_choice_3,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen","targetIsAmbiguousYN"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )
ggplot(subj_summary_item_choice_3,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("mutual exclusivity","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("mutual exclusivity","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("mutual exclusivity","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))+
  facet_wrap(~chosen_factor)m <- glmer(isRight~targetIsAmbiguousC*chosenC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 3"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguousC * chosenC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: subset(d, trialType == "test" & experiment_name == "Experiment 3")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    296.5    323.8   -140.3    280.5      216 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9202 -0.6319  0.3743  0.5870  1.5328 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr
##  subject     (Intercept)        2.47115  1.5720       
##              targetIsAmbiguousC 0.52951  0.7277   0.38
##  targetImage (Intercept)        0.09831  0.3135       
## Number of obs: 224, groups:  subject, 56; targetImage, 4
## 
## Fixed effects:
##                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                  0.4250     0.3188   1.333    0.182
## targetIsAmbiguousC          -0.5291     0.3671  -1.441    0.150
## chosenC                      0.3414     0.3474   0.983    0.326
## targetIsAmbiguousC:chosenC   0.2390     0.8039   0.297    0.766
## 
## Correlation of Fixed Effects:
##             (Intr) trgIAC chosnC
## trgtIsAmbgC  0.045              
## chosenC      0.001 -0.255       
## trgtIsAmC:C -0.164  0.003  0.031confint(m, method="Wald")[5:8,]##                                 2.5 %    97.5 %
## (Intercept)                -0.1997402 1.0497618
## targetIsAmbiguousC         -1.2487101 0.1904759
## chosenC                    -0.3395635 1.0223851
## targetIsAmbiguousC:chosenC -1.3365340 1.8145833##relationship between sampling preference for ambiguous items and test accuracy
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous_me"&experiment_name=="Experiment 3")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous_me"&experiment_name=="Experiment 3")$prop_ambig_selection)## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous_me" &  and subset(subj_selection, ambiguity_condition == "ambiguous_me" &     experiment_name == "Experiment 3")$accuracy and     experiment_name == "Experiment 3")$prop_ambig_selection
## t = 0.49839, df = 54, p-value = 0.6202
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1987701  0.3247892
## sample estimates:
##        cor 
## 0.06766648##plot
ggplot(filter(subj_selection,experiment_name=="Experiment 3"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  scale_color_manual(limits=c("ambiguous_me"),
                     values=c("#E41A1C"))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=24)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Probability of \nAmbiguous Selection")Analysis of learners’ sampling preferences.
## Plot
d$conditionFull <- ifelse(d$ambiguity_condition=="ambiguous",0,
                          ifelse(d$ambiguity_condition=="partially ambiguous",-1,NA))
m <- glmer(isAmbiguous~conditionFull+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
model_pred <- data.frame(conditionFull=c(-1,0),ambiguity_condition=c("partially ambiguous","ambiguous"))
pY <- predictSE(m,model_pred, type="response")
model_pred <- model_pred %>%
  mutate(
    prop_ambiguous = pY$fit,
    prop_ambiguous_lower_ci = pY$fit - 1.96*pY$se.fit,
    prop_ambiguous_upper_ci = pY$fit + 1.96*pY$se.fit)
p_expS1_sampling <- ggplot(model_pred,aes(x=ambiguity_condition,y=prop_ambiguous,color=ambiguity_condition,fill=ambiguity_condition))+
  geom_bar(stat="identity",size=2.5,fill="white",width=0.5)+
  geom_dotplot(data=subset(subj_selection,experiment_name=="Experiment S1"), aes(y=prop_ambig_selection),binaxis="y",stackdir="center",alpha=0.5,dotsize=0.6)+
  geom_errorbar(aes(ymin=prop_ambiguous_lower_ci,ymax=prop_ambiguous_upper_ci),width=0,size=1.2)+
  ylab("Probability of \nAmbiguous Selection")+
  geom_hline(yintercept=0.5,linetype="dotted")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  theme_classic(base_size=18)+
  theme(legend.position="none")+
  scale_x_discrete(name="Condition")
p_expS1_sampling#descriptives
subj_summary_s1 <-  subj_selection %>%
  filter(experiment_name=="Experiment S1") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_s1)| ambiguity_condition | N | prop_ambiguous | prop_ambiguous_lower_ci | prop_ambiguous_upper_ci | 
|---|---|---|---|---|
| ambiguous | 28 | 0.6428571 | 0.5359183 | 0.7497960 | 
| partially ambiguous | 34 | 0.4779412 | 0.3910626 | 0.5648197 | 
Estimating the difference in preference for selecting ambiguous items in the Partially Ambiguous condition vs. the Fully Ambiguous condition.
d$conditionC <- ifelse(d$ambiguity_condition=="ambiguous",0.5, 
                       ifelse(d$ambiguity_condition=="partially ambiguous",-0.5,NA))
m <- glmer(isAmbiguous~conditionC+(1|subject)+(1|choiceImage),data=subset(d,(experiment_name=="Experiment S1")&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ conditionC + (1 | subject) + (1 | choiceImage)
##    Data: subset(d, (experiment_name == "Experiment S1") & trialType ==  
##     "selection")
## Control: glmerControl(check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##      342      356     -167      334      244 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3825 -0.9601  0.6865  0.9394  1.0968 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev. 
##  subject     (Intercept) 1.151e-01 3.393e-01
##  choiceImage (Intercept) 8.752e-10 2.958e-05
## Number of obs: 248, groups:  subject, 62; choiceImage, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   0.2569     0.1401   1.833   0.0668 .
## conditionC    0.6955     0.2815   2.471   0.0135 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## conditionC 0.145confint(m,method="Wald")[3:4,]##                   2.5 %    97.5 %
## (Intercept) -0.01777231 0.5315834
## conditionC   0.14384157 1.2472401Estimating the preference for (fully) ambiguous items by re-centering the model on the fully ambiguous condition. The intercept represents the preference for selecting ambiguous items in the Fully Ambiguous condition.
## Fully Ambiguous Condition
d$conditionFull <- ifelse(d$ambiguity_condition=="ambiguous",0,
                          ifelse(d$ambiguity_condition=="partially ambiguous",-1,NA))
m <- glmer(isAmbiguous~conditionFull+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ conditionFull + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment S1" & trialType == "selection")
## Control: glmerControl(check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##      342      356     -167      334      244 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3825 -0.9601  0.6865  0.9394  1.0968 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.1151   0.3393  
##  choiceImage (Intercept) 0.0000   0.0000  
## Number of obs: 248, groups:  subject, 62; choiceImage, 8
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)     0.6047     0.2125   2.845  0.00444 **
## conditionFull   0.6955     0.2815   2.471  0.01347 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## conditinFll 0.758confint(m,method="Wald")[3:4,]##                   2.5 %   97.5 %
## (Intercept)   0.1881258 1.021231
## conditionFull 0.1438395 1.247238Estimating the preference for (partially) ambiguous items by re-centering the model on the partially ambiguous condition. The intercept represents the preference for selecting ambiguous items in the Partially Ambiguous condition.
d$conditionPartial <- ifelse(d$ambiguity_condition=="ambiguous",1,
                            ifelse(d$ambiguity_condition=="partially ambiguous",0,NA))
m <- glmer(isAmbiguous~conditionPartial+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ conditionPartial + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment S1" & trialType == "selection")
## Control: glmerControl(check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##      342      356     -167      334      244 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3825 -0.9601  0.6865  0.9394  1.0968 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.1151   0.3393  
##  choiceImage (Intercept) 0.0000   0.0000  
## Number of obs: 248, groups:  subject, 62; choiceImage, 8
## 
## Fixed effects:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)      -0.09086    0.18365  -0.495   0.6208  
## conditionPartial  0.69553    0.28148   2.471   0.0135 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## conditnPrtl -0.656confint(m,method="Wald")[3:4,]##                       2.5 %    97.5 %
## (Intercept)      -0.4508106 0.2690974
## conditionPartial  0.1438339 1.2472347##condition comparison
wilcox.test(subset(subj_selection , ambiguity_condition=="ambiguous"&experiment_name=="Experiment S1")$prop_ambig_selection,
            subset(subj_selection, ambiguity_condition=="partially ambiguous"&experiment_name=="Experiment S1")$prop_ambig_selection,
            conf.int=T, conf.level=0.95)## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==  and subset(subj_selection, ambiguity_condition == "partially ambiguous" &     "Experiment S1")$prop_ambig_selection and     experiment_name == "Experiment S1")$prop_ambig_selection
## W = 631.5, p-value = 0.02256
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  2.599459e-05 2.500091e-01
## sample estimates:
## difference in location 
##              0.2499557##Fully Ambiguous Condition
wilcox.test(subset(subj_selection, experiment_name=="Experiment S1"&ambiguity_condition=="ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  subset(subj_selection, experiment_name == "Experiment S1" & ambiguity_condition ==     "ambiguous")$prop_ambig_selection
## V = 140, p-value = 0.01534
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.5000486 0.8750296
## sample estimates:
## (pseudo)median 
##       0.750061##Partially Ambiguous Condition
wilcox.test(subset(subj_selection, experiment_name=="Experiment S1"&ambiguity_condition=="partially ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  subset(subj_selection, experiment_name == "Experiment S1" & ambiguity_condition ==     "partially ambiguous")$prop_ambig_selection
## V = 100.5, p-value = 0.5901
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.2500428 0.6249926
## sample estimates:
## (pseudo)median 
##      0.4999225Analysis of learners’ test performance.
##summarize participant accuracy by test half
subj_test_half <- d %>%
  filter(trialType=="test"&experiment_name=="Experiment S1") %>%
  group_by(subject,experiment_name,ambiguity_condition, testHalf) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
##by item and test half
subj_test_half_item <- d %>%
  filter(trialType=="test"&experiment_name=="Experiment S1") %>%
  group_by(subject,experiment_name,ambiguity_condition, testHalf,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))#item summary within-subjects corrected CIs
subj_summary_item_testhalf_correctedCIs_s1 <- summarySEwithin(
  filter(
    subj_test_half_item,
    (experiment_name=="Experiment S1")),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("testHalf","targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    testHalf_name = paste("Test Block",testHalf,sep=" ")
  )
#Experiment S1 - test
p_expS1_fulltest <- ggplot(filter(subj_summary_item_testhalf_correctedCIs_s1,ambiguity_condition=="ambiguous"),aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_point(data=filter(subj_test_half_item,experiment_name=="Experiment S1"&ambiguity_condition=="ambiguous"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type\n\nFully Ambiguous Condition",
                   limits=c("no","yes"),
                   labels=c("disambig-\nuated","fully\nambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
  theme_classic(base_size=12)+
  theme(legend.position="none", axis.text.x=element_text(size=9))+
  facet_wrap(~testHalf_name)
p_expS1_partialtest <- ggplot(filter(subj_summary_item_testhalf_correctedCIs_s1, ambiguity_condition=="partially ambiguous"),aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_point(data=filter(subj_test_half_item,experiment_name=="Experiment S1"&ambiguity_condition=="partially ambiguous"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","partially\nambiguous"),
                     values=c("#4DAF4A","#377EB8"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","partially\nambiguous"),
                    values=c("#4DAF4A","#377EB8"))+
  scale_x_discrete(name = "Item Type\n\nPartially Ambiguous Condition",
                   limits=c("no","yes"),
                   labels=c("disambig-\nuated","partially\nambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
  theme_classic(base_size=12)+
  theme(legend.position="none", axis.text.x=element_text(size=9))+
  facet_wrap(~testHalf_name)
plot_grid(p_expS1_fulltest,p_expS1_partialtest, labels=c("A","B"),rel_widths=c(1,1),label_size=24,nrow=1)Split by test half.
## descriptives
subj_summary_test_s1 <-  summarySEwithin(
  subj_test_half,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("testHalf"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    testHalf_name = paste("Test Block",testHalf,sep=" ")
  ) %>%
  select(-accuracy_norm,-se,-sd,-ci)
  
kable(subj_summary_test_s1)| experiment_name | ambiguity_condition | testHalf | N | accuracy | lower_ci | upper_ci | testHalf_name | 
|---|---|---|---|---|---|---|---|
| Experiment S1 | ambiguous | 1 | 28 | 0.6517857 | 0.6118389 | 0.6917325 | Test Block 1 | 
| Experiment S1 | ambiguous | 2 | 28 | 0.7276786 | 0.6877318 | 0.7676254 | Test Block 2 | 
| Experiment S1 | partially ambiguous | 1 | 34 | 0.7683824 | 0.7248067 | 0.8119580 | Test Block 1 | 
| Experiment S1 | partially ambiguous | 2 | 34 | 0.7757353 | 0.7321596 | 0.8193110 | Test Block 2 | 
Split by test half.
## split by item
subj_summary_test_item_s1 <-  summarySEwithin(
  subj_test_half_item,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("testHalf","targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    testHalf_name = paste("Test Block",testHalf,sep=" ")
  ) %>%
  select(-accuracy_norm,-se,-sd,-ci)
kable(subj_summary_test_item_s1)| experiment_name | ambiguity_condition | testHalf | targetIsAmbiguousYN | N | accuracy | lower_ci | upper_ci | testHalf_name | 
|---|---|---|---|---|---|---|---|---|
| Experiment S1 | ambiguous | 1 | no | 28 | 0.8392857 | 0.7786705 | 0.8999009 | Test Block 1 | 
| Experiment S1 | ambiguous | 1 | yes | 28 | 0.4642857 | 0.3776673 | 0.5509041 | Test Block 1 | 
| Experiment S1 | ambiguous | 2 | no | 28 | 0.8571429 | 0.7783393 | 0.9359464 | Test Block 2 | 
| Experiment S1 | ambiguous | 2 | yes | 28 | 0.5982143 | 0.5156617 | 0.6807669 | Test Block 2 | 
| Experiment S1 | partially ambiguous | 1 | no | 34 | 0.7500000 | 0.6896160 | 0.8103840 | Test Block 1 | 
| Experiment S1 | partially ambiguous | 1 | yes | 34 | 0.7867647 | 0.7272860 | 0.8462434 | Test Block 1 | 
| Experiment S1 | partially ambiguous | 2 | no | 34 | 0.7941176 | 0.7390598 | 0.8491754 | Test Block 2 | 
| Experiment S1 | partially ambiguous | 2 | yes | 34 | 0.7573529 | 0.6839887 | 0.8307171 | Test Block 2 | 
d$testHalfC <- ifelse(!is.na(d$testHalf) & d$testHalf==2,0.5,
                      ifelse(!is.na(d$testHalf) & d$testHalf==1,-0.5,NA))
#three-way interaction
m <- glmer(isRight~targetIsAmbiguousC*conditionC*testHalfC+(1+targetIsAmbiguousC*testHalfC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment S1")&trialType=="test"&ambiguity_condition!="non ambiguous"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguousC * conditionC * testHalfC + (1 +  
##     targetIsAmbiguousC * testHalfC | subject) + (1 | targetImage)
##    Data: filter(d, (experiment_name == "Experiment S1") & trialType ==  
##     "test" & ambiguity_condition != "non ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    846.0    939.1   -404.0    808.0      973 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0148 -0.2249  0.1640  0.3500  4.4299 
## 
## Random effects:
##  Groups      Name                         Variance Std.Dev. Corr          
##  subject     (Intercept)                  5.4970   2.3446                 
##              targetIsAmbiguousC           1.0299   1.0148   0.02          
##              testHalfC                    0.7031   0.8385   0.99 0.19     
##              targetIsAmbiguousC:testHalfC 0.5055   0.7110   0.35 0.94 0.50
##  targetImage (Intercept)                  0.1513   0.3889                 
## Number of obs: 992, groups:  subject, 62; targetImage, 8
## 
## Fixed effects:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                               2.0641     0.3900   5.292 1.21e-07
## targetIsAmbiguousC                       -1.3301     0.3950  -3.368 0.000758
## conditionC                               -0.7664     0.6784  -1.130 0.258643
## testHalfC                                 0.8217     0.3614   2.273 0.023007
## targetIsAmbiguousC:conditionC            -2.7515     0.5694  -4.832 1.35e-06
## targetIsAmbiguousC:testHalfC             -0.1361     0.6891  -0.197 0.843444
## conditionC:testHalfC                      0.1909     0.5104   0.374 0.708427
## targetIsAmbiguousC:conditionC:testHalfC   0.7522     0.9304   0.808 0.418824
##                                            
## (Intercept)                             ***
## targetIsAmbiguousC                      ***
## conditionC                                 
## testHalfC                               *  
## targetIsAmbiguousC:conditionC           ***
## targetIsAmbiguousC:testHalfC               
## conditionC:testHalfC                       
## targetIsAmbiguousC:conditionC:testHalfC    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trgIAC cndtnC tstHlC trIAC:C tIAC:H cnC:HC
## trgtIsAmbgC -0.174                                           
## conditionC   0.028 -0.129                                    
## testHalfC    0.447 -0.127  0.000                             
## trgtIsAmC:C -0.108 -0.019 -0.003 -0.067                      
## trgtIsAC:HC -0.031  0.418 -0.020 -0.306 -0.038               
## cndtnC:tsHC -0.014 -0.097  0.476 -0.077  0.034  -0.236       
## trgIAC:C:HC -0.039 -0.045  0.059 -0.247  0.260  -0.093  0.086confint(m, method="Wald")[12:19,]##                                              2.5 %     97.5 %
## (Intercept)                              1.2996085  2.8285636
## targetIsAmbiguousC                      -2.1042260 -0.5560018
## conditionC                              -2.0960427  0.5633383
## testHalfC                                0.1132606  1.5301136
## targetIsAmbiguousC:conditionC           -3.8675112 -1.6355179
## targetIsAmbiguousC:testHalfC            -1.4867027  1.2145210
## conditionC:testHalfC                    -0.8095147  1.1912735
## targetIsAmbiguousC:conditionC:testHalfC -1.0713783  2.5757980#test specifically test half increase for ambiguous items in the Fully Ambiguous condition
d$targetIsAmbiguous_ambiguous <- ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==1,0,
                                        ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==0,-1,NA))
m <- glmer(isRight~targetIsAmbiguous_ambiguous*conditionFull*testHalfC+(1+targetIsAmbiguous_ambiguous*testHalfC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment S1")&trialType=="test"&ambiguity_condition!="non ambiguous"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguous_ambiguous * conditionFull * testHalfC +  
##     (1 + targetIsAmbiguous_ambiguous * testHalfC | subject) +  
##     (1 | targetImage)
##    Data: filter(d, (experiment_name == "Experiment S1") & trialType ==  
##     "test" & ambiguity_condition != "non ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    846.0    939.1   -404.0    808.0      973 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0148 -0.2249  0.1640  0.3500  4.4299 
## 
## Random effects:
##  Groups      Name                                  Variance Std.Dev. Corr     
##  subject     (Intercept)                           5.8087   2.4101            
##              targetIsAmbiguous_ambiguous           1.0299   1.0148   0.23     
##              testHalfC                             1.1294   1.0627   0.97 0.46
##              targetIsAmbiguous_ambiguous:testHalfC 0.5055   0.7110   0.54 0.94
##  targetImage (Intercept)                           0.1513   0.3889            
##      
##      
##      
##      
##  0.73
##      
## Number of obs: 992, groups:  subject, 62; targetImage, 8
## 
## Fixed effects:
##                                                     Estimate Std. Error z value
## (Intercept)                                           0.3280     0.5258   0.624
## targetIsAmbiguous_ambiguous                          -2.7059     0.4825  -5.608
## conditionFull                                        -2.1421     0.7349  -2.915
## testHalfC                                             1.0371     0.4364   2.377
## targetIsAmbiguous_ambiguous:conditionFull            -2.7515     0.5694  -4.833
## targetIsAmbiguous_ambiguous:testHalfC                 0.2400     0.7947   0.302
## conditionFull:testHalfC                               0.5670     0.7195   0.788
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC   0.7522     0.9303   0.809
##                                                     Pr(>|z|)    
## (Intercept)                                          0.53276    
## targetIsAmbiguous_ambiguous                         2.05e-08 ***
## conditionFull                                        0.00356 ** 
## testHalfC                                            0.01747 *  
## targetIsAmbiguous_ambiguous:conditionFull           1.35e-06 ***
## targetIsAmbiguous_ambiguous:testHalfC                0.76263    
## conditionFull:testHalfC                              0.43070    
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC  0.41877    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trgIA_ cndtnF tstHlC trIA_:F tIA_:H cnF:HC
## trgtIsAmbg_ 0.236                                            
## conditinFll 0.640  0.123                                     
## testHalfC   0.498  0.161  0.300                              
## trgtIsAm_:F 0.181  0.575  0.384  0.073                       
## trgtIsA_:HC 0.133  0.346  0.062  0.481  0.119                
## cndtnFll:HC 0.232  0.033  0.422  0.467  0.192   0.217        
## trgIA_:F:HC 0.062  0.117  0.155  0.305  0.260   0.505  0.707confint(m, method="Wald")[12:19,]##                                                          2.5 %     97.5 %
## (Intercept)                                         -0.7025257  1.3584754
## targetIsAmbiguous_ambiguous                         -3.6515408 -1.7602106
## conditionFull                                       -3.5824604 -0.7017594
## testHalfC                                            0.1818284  1.8924395
## targetIsAmbiguous_ambiguous:conditionFull           -3.8674642 -1.6355665
## targetIsAmbiguous_ambiguous:testHalfC               -1.3175109  1.7975297
## conditionFull:testHalfC                             -0.8432513  1.9772172
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC -1.0711732  2.5755888Fully Ambiguous Condition - Test Block 1
#correlations Experiment S1: proportion ambiguous items selected and test accuracy
subj_test_half <- subj_test_half %>% 
  left_join(select(subj_selection,subject,prop_ambig_selection)) %>%
  mutate(
    testHalf_name = paste("Test Block",testHalf,sep=" "))
#Fully Ambiguous - test block 1
cor.test(subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==1)$accuracy,subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==1)$prop_ambig_selection)## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==  and subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==     1)$accuracy and     1)$prop_ambig_selection
## t = 2.5909, df = 26, p-value = 0.01549
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.09617591 0.70664954
## sample estimates:
##       cor 
## 0.4529988Fully Ambiguous Condition - Test Block 2
#Fully Ambiguous - test block 2
cor.test(subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==2)$accuracy,subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==2)$prop_ambig_selection)## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==  and subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==     2)$accuracy and     2)$prop_ambig_selection
## t = 1.8848, df = 26, p-value = 0.07069
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03028924  0.63734347
## sample estimates:
##       cor 
## 0.3467055Partially Ambiguous Condition - Test Block 1
#Partially Ambiguous - test block 1
cor.test(subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==1)$accuracy,subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==1)$prop_ambig_selection)## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "partially ambiguous" &  and subset(subj_test_half, ambiguity_condition == "partially ambiguous" &     testHalf == 1)$accuracy and     testHalf == 1)$prop_ambig_selection
## t = 0.55366, df = 32, p-value = 0.5837
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2489571  0.4216846
## sample estimates:
##        cor 
## 0.09740938Partially Ambiguous Condition - Test Block 2
#Partially Ambiguous - test block 2
cor.test(subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==2)$accuracy,subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==2)$prop_ambig_selection)## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "partially ambiguous" &  and subset(subj_test_half, ambiguity_condition == "partially ambiguous" &     testHalf == 2)$accuracy and     testHalf == 2)$prop_ambig_selection
## t = -0.67859, df = 32, p-value = 0.5023
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4395666  0.2282537
## sample estimates:
##        cor 
## -0.1191055#Experiment S1 - sampling-test
pS1_fullsampling_test <- ggplot(filter(subj_test_half,ambiguity_condition=="ambiguous"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  scale_color_manual(limits=c("ambiguous"),
                     labels=c("fully ambiguous"),
                     values=c("#E41A1C"))+
  #geom_point(position=position_jitter(width=.05,height=.0))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  #ylim(0,1.08)+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=12)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Proportion of Ambiguous Selections\n\nFully Ambiguous Condition")+
  facet_wrap(~testHalf_name)
pS1_partialsampling_test <- ggplot(filter(subj_test_half,ambiguity_condition=="partially ambiguous"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  scale_color_manual(limits=c("partially ambiguous"),
                     labels=c("partially ambiguous"),
                     values=c("#377EB8"))+
  #geom_point(position=position_jitter(width=.05,height=.0))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  #ylim(0,1.08)+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=12)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Proportion of Ambiguous Selections\n\nPartially Ambiguous Condition")+
  facet_wrap(~testHalf_name)
plot_grid(pS1_fullsampling_test,pS1_partialsampling_test,labels=c("A","B"),rel_widths=c(1,1),label_size=24,nrow=1)