Analysis walkthrough for the paper

Zettersten, M., & Saffran, J. (accepted). Sampling to learn words: Adults and children sample words that reduce referential ambiguity. Developmental Science.

For more background information and the underlying data, visit the OSF repository for this report: OSF site

See crossact_codebook for information about individual columns.

Reading in data and preliminary processing

d <- read.csv(data_path)

Exclusions

Overview over exclusions in each experiment.

##exclusions
exclusions <-  d %>%
  group_by(subject,experiment_name) %>%
  summarize(Exclude=Exclude[1]) %>%
  ungroup()  %>%
  group_by(experiment_name) %>%
  summarize(N=n(),num_exclusions=sum(Exclude!="N"))
kable(exclusions)
experiment_name N num_exclusions
Experiment 1 31 3
Experiment 2 40 2
Experiment 3 58 2
Experiment S1 62 0
#remove excluded participants
d <- d %>%
  filter(Exclude=="N")

Demographics

Overview over demographic characteristics of each sample.

##demographics (exclusions filtered)
demographics <- d %>%
  filter(Exclude=="N") %>%
  group_by(subject,age_group,experiment_name) %>%
  summarize(Gender=Gender[1],Age=Age[1],L1_english=L1_english[1],languages_besides_english_yn=languages_besides_english_yn[1],L1=L1[1],L1percent=L1percent[1]) %>%
  ungroup() %>%
  group_by(age_group,experiment_name)  %>%
  summarize(
    N=n(),
    gender_f=sum(Gender=="female"),
    mean_age=round(mean(Age,na.rm=T),2),
    sd_age=round(sd(Age,na.rm=T),2),
    min_age=round(min(Age,na.rm=T),2),
    max_age=round(max(Age,na.rm=T),2),
    native_english=ifelse(is.na(sum(L1_english=="English")),sum(L1=="English",na.rm=T),sum(L1_english=="English",na.rm=T)),
    languages_besides_english=ifelse(age_group[1]=="adults",NA,sum(languages_besides_english_yn=="Yes",na.rm=T)),
    monolingual=ifelse(age_group[1]=="adults",NA,sum(L1percent>=90,na.rm=T))
  )

Experiment 1

demographics %>%
  filter(experiment_name=="Experiment 1") %>%
  kable()
age_group experiment_name N gender_f mean_age sd_age min_age max_age native_english languages_besides_english monolingual
adults Experiment 1 28 8 31.39 7.25 19 48 28 NA NA

Experiment 2

Gender, Age, Native Language

demographics %>%
  filter(experiment_name=="Experiment 2") %>%
  kable()
age_group experiment_name N gender_f mean_age sd_age min_age max_age native_english languages_besides_english monolingual
kids Experiment 2 38 19 5.9 1.19 4.1 8.12 38 6 32

Ethnicity

d %>%
  filter(Exclude=="N"&experiment_name=="Experiment 2") %>%
  group_by(subject) %>%
  summarize(hispanic=hispanic[1],ethnicity=ethnicity[1]) %>%
  ungroup() %>%
  group_by(hispanic,ethnicity) %>%
  summarize(count=n()) %>%
  kable()
hispanic ethnicity count
No Asian 1
No Asian,White 2
No Black or African American,White 1
No Other 1
No White 30
Yes Other 1
Yes White 1
NA White 1

Experiment 3

Note that we are currently missing some demographic information (ethnicity & language) for 6 of the 56 participants, due to experimenter error.

Gender, Age, Native Language

demographics %>%
  filter(experiment_name=="Experiment 3") %>%
  kable()
age_group experiment_name N gender_f mean_age sd_age min_age max_age native_english languages_besides_english monolingual
kids Experiment 3 56 33 5.53 1.18 3.29 7.88 50 17 38

Ethnicity

d %>%
  filter(Exclude=="N"&experiment_name=="Experiment 3") %>%
  group_by(subject) %>%
  summarize(hispanic=hispanic[1],ethnicity=ethnicity[1]) %>%
  ungroup() %>%
  group_by(hispanic,ethnicity) %>%
  summarize(count=n()) %>%
  kable()
hispanic ethnicity count
No Asian 8
No Asian,Black or African American 1
No Asian,White 1
No Black or African American 1
No Black or African American,White 1
No Black or African American,White,Other 1
No Other 1
No White 33
Yes Other 1
Yes White 1
NA White 1
NA NA 6

Experiment S1

demographics %>%
  filter(experiment_name=="Experiment S1") %>%
  kable()
age_group experiment_name N gender_f mean_age sd_age min_age max_age native_english languages_besides_english monolingual
adults Experiment S1 62 27 19.13 1.01 18 22 56 NA NA

Summarizing data by participant

First, we summarize sampling and test behavior by participant and store these objects for later plotting and analysis.

Sampling

#Summarize sampling and test behavior by subject
#used in later analyses and plotting

#selections by subject
subj_selection <- d %>%
  filter(trialType=="selection") %>%
  group_by(experiment_name,ambiguity_condition,subject) %>%
  summarize(
    N=n(),
    prop_ambig_selection=sum(selectionType!="low")/N,
    num_ambig_selection=sum(selectionType!="low"))

subj_selection %>%
  arrange(experiment_name,ambiguity_condition,subject) %>%
  DT::datatable()

Overall Test

#test performance by subject
subj_test <- d %>%
  filter(trialType=="test") %>%
  group_by(experiment_name,ambiguity_condition,subject) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
subj_test %>%
  arrange(experiment_name,ambiguity_condition,subject) %>%
  DT::datatable()

Test By Item

#test performance split by item type (ambiguous vs. non-ambiguous)
subj_test_item <- d %>%
  filter(trialType=="test") %>%
  group_by(experiment_name,ambiguity_condition,subject,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
subj_test_item %>%
  arrange(experiment_name,ambiguity_condition,subject) %>%
  DT::datatable()

Experiment 1

Sampling

Analysis of learners’ sampling preferences.

Plot

Plot is based on estimates from the logistic mixed-effects model

#create data frame with model predictions
m <- glmer(isAmbiguous~(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 1"&trialType=="selection"&ambiguity_condition=="ambiguous"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
model_pred <- data.frame(ambiguity_condition="ambiguous")
pY <- predictSE(m,model_pred, type="response")
model_pred <- model_pred %>%
  mutate(
    experiment_name= "Experiment 1",
    prop_ambiguous = pY$fit,
    prop_ambiguous_lower_ci = pY$fit - 1.96*pY$se.fit,
    prop_ambiguous_upper_ci = pY$fit + 1.96*pY$se.fit)

#create plot
p_exp1_sampling <- ggplot(subset(model_pred,ambiguity_condition=="ambiguous"),aes(x=experiment_name,y=prop_ambiguous,color=ambiguity_condition,fill=ambiguity_condition))+
  geom_bar(stat="identity",size=2.5,fill="white",width=0.5)+
  geom_dotplot(data=subset(subj_selection,experiment_name=="Experiment 1"&ambiguity_condition=="ambiguous"), aes(y=prop_ambig_selection),binaxis="y",stackdir="center",alpha=0.5,dotsize=0.6)+
  geom_errorbar(aes(ymin=prop_ambiguous_lower_ci,ymax=prop_ambiguous_upper_ci),width=0,size=1.2)+
  ylab("Probability of \nAmbiguous Selection")+
  geom_hline(yintercept=0.5,linetype="dotted")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  theme_classic(base_size=24)+
  theme(legend.position="none")+
  scale_x_discrete(name="Experiment 1")+
  theme(axis.ticks.x = element_blank(), axis.text.x = element_blank())
p_exp1_sampling

Descriptives

#descriptives
subj_summary_1 <-  subj_selection %>%
  filter(experiment_name=="Experiment 1") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_1)
ambiguity_condition N prop_ambiguous prop_ambiguous_lower_ci prop_ambiguous_upper_ci
ambiguous 28 0.625 0.5084928 0.7415072

Logistic mixed-effects model

m <- glmer(isAmbiguous~(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 1"&trialType=="selection"&ambiguity_condition=="ambiguous"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 1" & trialType == "selection" &  
##     ambiguity_condition == "ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    150.9    159.1    -72.5    144.9      109 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4998 -0.9396  0.5155  0.6668  1.0642 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.845    0.9193  
##  choiceImage (Intercept) 0.000    0.0000  
## Number of obs: 112, groups:  subject, 28; choiceImage, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   0.6155     0.2845   2.163   0.0305 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(m,method="Wald")[3,]
##      2.5 %     97.5 % 
## 0.05777627 1.17315329

Note that we ignore a singular fit warning here. This appears to be caused by the inclusion of a by-item random intercept - however, it does not appear to adversely affect the model fit, and a simplified model with the by-stimulus random intercept removed yields virtually identical results.

Non-parametric test (Wilcoxon)

wilcox.test(filter(subj_selection, experiment_name=="Experiment 1"&ambiguity_condition=="ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  filter(subj_selection, experiment_name == "Experiment 1" & ambiguity_condition ==     "ambiguous")$prop_ambig_selection
## V = 151.5, p-value = 0.02026
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.5000775 0.8750528
## sample estimates:
## (pseudo)median 
##       0.625024

Test

Analysis of learners’ test performance.

Plot

## split by item, within-subjects corrected CIs
subj_summary_test_item_1 <-  summarySEwithin(
  filter(
    subj_test_item,
    experiment_name=="Experiment 1"),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci
  ) %>%
  select(-accuracy_norm,-sd,-se,-ci)
## by-item test plot
ggplot(subj_summary_test_item_1,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_point(data=filter(subj_test_item,experiment_name=="Experiment 1"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","fully\nambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))

Descriptives

Overall Test Accuracy
## descriptives
subj_summary_test_1 <-  subj_test %>%
  filter(experiment_name=="Experiment 1") %>%
  group_by(experiment_name,ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_correct=mean(accuracy,na.rm=T),
    ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
    prop_correct_lower_ci=prop_correct-ci,
    prop_correct_upper_ci=prop_correct+ci
  ) %>%
  select(-ci)
kable(subj_summary_test_1)
experiment_name ambiguity_condition N prop_correct prop_correct_lower_ci prop_correct_upper_ci
Experiment 1 ambiguous 28 0.65625 0.5265768 0.7859232
Test Accuracy Split by Item
kable(subj_summary_test_item_1)
experiment_name ambiguity_condition targetIsAmbiguousYN N accuracy lower_ci upper_ci
Experiment 1 ambiguous no 28 0.6785714 0.5909601 0.7661827
Experiment 1 ambiguous yes 28 0.6339286 0.5463173 0.7215399

Logistic mixed-effects model

Overall Test Accuracy
## testing overall accuracy against chance
d$offset.125 <- 1/8
m <- glmer(isRight~offset(logit(offset.125))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 1"&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## isRight ~ offset(logit(offset.125)) + (1 | subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 1" & trialType == "test")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    236.0    246.3   -115.0    230.0      221 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1422 -0.4954  0.2084  0.5398  2.0369 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 5.30551  2.3034  
##  targetImage (Intercept) 0.09789  0.3129  
## Number of obs: 224, groups:  subject, 28; targetImage, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.1893     0.5097   6.258 3.91e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Testing Difference between Items
## testing difference between items
d$targetIsAmbiguousC <- ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==1,0.5,
                                        ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==0,-0.5,NA))
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment 1")&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: filter(d, (experiment_name == "Experiment 1") & trialType ==  
##     "test")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    233.5    253.9   -110.7    221.5      218 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0032 -0.4774  0.0571  0.3672  1.7067 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr 
##  subject     (Intercept)        9.14612  3.024         
##              targetIsAmbiguousC 8.80532  2.967    -0.98
##  targetImage (Intercept)        0.01905  0.138         
## Number of obs: 224, groups:  subject, 28; targetImage, 8
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)          1.6280     0.7177   2.268   0.0233 *
## targetIsAmbiguousC  -1.5652     0.9516  -1.645   0.1000  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## trgtIsAmbgC -0.823
confint(m, method="Wald")[5:6,]
##                         2.5 %    97.5 %
## (Intercept)         0.2213282 3.0345876
## targetIsAmbiguousC -3.4303089 0.2999958

Relationship between sampling and test accuracy

#join sampling and test
subj_selection <- subj_selection %>%
  left_join(subj_test,by=c("subject","experiment_name","ambiguity_condition"))

#correlation between preference for sampling ambiguous items and test performance
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 1")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 1")$prop_ambig_selection)
## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==  and subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==     "Experiment 1")$accuracy and     "Experiment 1")$prop_ambig_selection
## t = 3.6465, df = 26, p-value = 0.001167
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2664476 0.7845213
## sample estimates:
##       cor 
## 0.5817003

Plot correlation between sampling preference and test accuracy

p_exp1_sampling_test <- ggplot(filter(subj_selection,experiment_name=="Experiment 1"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  scale_color_manual(limits=c("ambiguous"),
                     values=c("#E41A1C"))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=24)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Probability of \nAmbiguous Selection")
p_exp1_sampling_test

Experiment 2

Sampling

Analysis of learners’ sampling preferences.

Plot

ggplot(filter(subj_selection,experiment_name=="Experiment 2"),aes(x=num_ambig_selection,fill=as.factor(num_ambig_selection),color=as.factor(num_ambig_selection)))+
  scale_fill_brewer(palette="Set1",direction=-1)+
  scale_color_brewer(palette="Set1",direction=-1)+
  geom_bar(stat="count",size=1.5,alpha=0.2,width=0.5)+
  geom_vline(xintercept=4/3,linetype="dashed")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16),
        legend.position="none")+
  ylab("Number of subjects")+
  xlab("Number of ambiguous selections")+
  #geom_density(aes(group=1, y=..count../1.3))+
  scale_x_continuous(breaks=c(0,1,2,3,4), limits=c(-0.5,4))+
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30), limits=c(0,20))

Descriptives

#descriptives
subj_summary_2 <-  subj_selection %>%
  filter(experiment_name=="Experiment 2") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_2)
ambiguity_condition N prop_ambiguous prop_ambiguous_lower_ci prop_ambiguous_upper_ci
ambiguous 38 0.3289474 0.2712987 0.386596

Logistic mixed-effects model

d$offset.33 <- 1/3
m <- glmer(isAmbiguous~offset(logit(offset.33))+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ offset(logit(offset.33)) + (1 | subject) + (1 |  
##     choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 2" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    198.6    207.6    -96.3    192.6      149 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.7001 -0.7001 -0.7001  1.4283  1.4283 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0        0       
##  choiceImage (Intercept) 0        0       
## Number of obs: 152, groups:  subject, 38; choiceImage, 6
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.0198     0.1726  -0.115    0.909
confint(m, method="Wald")[3,]
##      2.5 %     97.5 % 
## -0.3581671  0.3185617

Note that we ignore a singular fit warning here. This appears to be caused by the inclusion of a by-item random intercept - however, it does not appear to adversely affect the model fit, and a simplified model with the by-stimulus random intercept removed yields virtually identical results.

Non-parametric test (Wilcoxon)

wilcox.test(filter(subj_selection,experiment_name=="Experiment 2")$prop_ambig_selection,mu=1/3, conf.int=T, conf.level=0.95)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  filter(subj_selection, experiment_name == "Experiment 2")$prop_ambig_selection
## V = 409, p-value = 0.5734
## alternative hypothesis: true location is not equal to 0.3333333
## 95 percent confidence interval:
##  0.2500390 0.3750141
## sample estimates:
## (pseudo)median 
##      0.3749301

Relationship with Age

#predict ambiguous selections from age
m <- glmer(isAmbiguous~Age+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ Age + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 2" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    200.6    212.7    -96.3    192.6      148 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.7034 -0.7006 -0.6989  1.4260  1.4334 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0        0       
##  choiceImage (Intercept) 0        0       
## Number of obs: 152, groups:  subject, 38; choiceImage, 6
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.737329   0.881389  -0.837    0.403
## Age          0.004131   0.146437   0.028    0.977
## 
## Correlation of Fixed Effects:
##     (Intr)
## Age -0.981
confint(m, method="Wald")[3:4,]
##                  2.5 %    97.5 %
## (Intercept) -2.4648196 0.9901613
## Age         -0.2828792 0.2911415
#plot model predictions
pX <- data.frame(Age=seq(min(subset(d,experiment_name=="Experiment 2"&trialType=="selection")$Age,na.rm=T),max(subset(d,experiment_name=="Experiment 2"&trialType=="selection")$Age,na.rm=T),by=0.1))
pY <- predictSE(m,pX,re.form=NA,type="response")
pX$isAmbiguous <- pY$fit
pX$YLower <- pY$fit-pY$se.fit
pX$YUpper <- pY$fit+pY$se.fit
ggplot(pX,aes(Age,isAmbiguous))+
  geom_violinh(data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),scale="count",width=0.1, trim=F)+
  geom_jitter(data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),height=0.01)+
  geom_smooth(aes(ymin=YLower,ymax=YUpper),stat="identity",color="#E41A1C",fill="#E41A1C")+
  geom_hline(yintercept=1/3,linetype="dotted")+
  theme_classic(base_size=16)+
  xlab("Age (in years)")+
  scale_x_continuous(breaks=c(3,4,5,6,7,8))+
  ylab("Proportion of ambiguous selections")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))

Test

Plot

#summarize across participants by item type
subj_summary_item_2 <- summarySEwithin(
  filter(
    subj_test_item,
    (experiment_name=="Experiment 2")),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci
  ) %>%
  select(-accuracy_norm,-sd,-se,-ci)
## create plot
ggplot(subj_summary_item_2,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_dotplot(data=filter(subj_test_item,experiment_name=="Experiment 2"),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/6,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))

Descriptives

Overall Test Accuracy
##descriptives
#overall
subj_summary_test_2 <-  subj_test %>%
  filter(experiment_name=="Experiment 2") %>%
  group_by(experiment_name,ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_correct=mean(accuracy,na.rm=T),
    ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
    prop_correct_lower_ci=prop_correct-ci,
    prop_correct_upper_ci=prop_correct+ci
  ) %>%
  select(-ci)
kable(subj_summary_test_2)
experiment_name ambiguity_condition N prop_correct prop_correct_lower_ci prop_correct_upper_ci
Experiment 2 ambiguous 38 0.3859649 0.3073429 0.4645869
Test Accuracy Split by Item
#by item type
subj_summary_item_2 %>%
  kable()
experiment_name ambiguity_condition targetIsAmbiguousYN N accuracy lower_ci upper_ci
Experiment 2 ambiguous no 38 0.3355263 0.2484583 0.4225943
Experiment 2 ambiguous yes 38 0.4868421 0.3997741 0.5739101

Logistic Mixed-Effects Model

Overall Test Accuracy
## testing overall accuracy against chance
d$offset.17 <- 1/6
m <- glmer(isRight~offset(logit(offset.17))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 2"&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ offset(logit(offset.17)) + (1 | subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 2" & trialType == "test")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    306.6    316.9   -150.3    300.6      224 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0598 -0.7565 -0.6739  1.0550  1.4839 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.3354   0.5791  
##  targetImage (Intercept) 0.0000   0.0000  
## Number of obs: 227, groups:  subject, 38; targetImage, 6
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.1131     0.1723   6.461 1.04e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Test Accuracy by Item Type
## accuracy by item type
##logistic mixed=effects model
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 2"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: subset(d, trialType == "test" & experiment_name == "Experiment 2")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    307.5    328.0   -147.7    295.5      221 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3639 -0.7441 -0.5889  1.0369  1.6980 
## 
## Random effects:
##  Groups      Name               Variance  Std.Dev. Corr
##  subject     (Intercept)        0.3840684 0.61973      
##              targetIsAmbiguousC 0.0001614 0.01271  1.00
##  targetImage (Intercept)        0.0000000 0.00000      
## Number of obs: 227, groups:  subject, 38; targetImage, 6
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         -0.3970     0.1824  -2.176   0.0296 *
## targetIsAmbiguousC   0.6795     0.3046   2.231   0.0257 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## trgtIsAmbgC 0.209
confint(m, method="Wald")[5:6,]
##                          2.5 %      97.5 %
## (Intercept)        -0.75450961 -0.03940061
## targetIsAmbiguousC  0.08253043  1.27644793

Ambiguous Item Choices

Which items (ambiguous vs. disambiguated) do participants choose on each test trial type? Here, we asked what proportion of the time participants select one of the two ambiguous items when the target label is for an ambiguous vs. a disambiguated item (regardless of accuracy).

## by choice type
# Which items (ambiguous vs. disambiguated) do participants choose on each test trial type?
d$testChoiceType <- ifelse(d$trialType=="test"&(as.character(d$choiceImage)==d$High1|as.character(d$choiceImage)==d$High2),"ambiguous",
                           ifelse(d$trialType=="test","disambiguated",NA))

#summarize choice tendency by participant
subj_test_choiceType_2 <- d %>%
  filter(trialType=="test"&experiment_name  == "Experiment 2") %>%
  group_by(subject,experiment_name,ambiguity_condition,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    ambiguous_choice=mean(testChoiceType=="ambiguous",na.rm=T))

#summarize across participants
subj_summary_choiceType_2 <- summarySEwithin(
  subj_test_choiceType_2,
  "ambiguous_choice",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = ambiguous_choice - ci,
    upper_ci = ambiguous_choice + ci
  ) %>%
  select(-ambiguous_choice_norm,-sd,-se,-ci)
kable(subj_summary_choiceType_2)
experiment_name ambiguity_condition targetIsAmbiguousYN N ambiguous_choice lower_ci upper_ci
Experiment 2 ambiguous no 38 0.1842105 0.0798448 0.2885762
Experiment 2 ambiguous yes 38 0.6184211 0.5140554 0.7227867
##plot
ggplot(subj_summary_choiceType_2,aes(x=targetIsAmbiguousYN,y=ambiguous_choice,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_dotplot(data=subj_test_choiceType_2,alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","ambiguous"))+
  ylab("Proportion Ambiguous Items Selected")+
  geom_hline(yintercept=1/3,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))

Sampling and test accuracy

Overall Accuracy for Sampled vs. Not Sampled Items
#summarize test accuracy by choice
subj_test_choice_2 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 2")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T)) 

subj_summary_choice_2 <- summarySEwithin(
  subj_test_choice_2,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )
subj_summary_choice_2 %>%
  select(-accuracy_norm, -sd,-se,-ci) %>%
  kable()
experiment_name ambiguity_condition chosen N accuracy lower_ci upper_ci chosen_factor
Experiment 2 ambiguous 0 38 0.2675439 0.1621150 0.3729727 NOT SAMPLED
Experiment 2 ambiguous 1 38 0.4517544 0.3463255 0.5571833 SAMPLED
Plot by Item Type
subj_test_item_choice_2 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 2")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))

subj_summary_item_choice_2 <- summarySEwithin(
  subj_test_item_choice_2,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen","targetIsAmbiguousYN"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )

ggplot(subj_summary_item_choice_2,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))+
  facet_wrap(~chosen_factor)

Logistic Mixed-Effects Model
d$chosenC <- ifelse(!is.na(d$chosen)&d$chosen==0,-0.5,
            ifelse(!is.na(d$chosen)&d$chosen==1,0.5,NA))
m <- glmer(isRight~targetIsAmbiguousC*chosenC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 2"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguousC * chosenC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: subset(d, trialType == "test" & experiment_name == "Experiment 2")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    302.2    329.6   -143.1    286.2      219 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5720 -0.7516 -0.4534  0.9339  2.4960 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr 
##  subject     (Intercept)        0.44890  0.6700        
##              targetIsAmbiguousC 0.03932  0.1983   -1.00
##  targetImage (Intercept)        0.00000  0.0000        
## Number of obs: 227, groups:  subject, 38; targetImage, 6
## 
## Fixed effects:
##                            Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                 -0.5662     0.2014  -2.811  0.00494 **
## targetIsAmbiguousC           0.8276     0.3441   2.405  0.01618 * 
## chosenC                      0.8896     0.3349   2.656  0.00790 **
## targetIsAmbiguousC:chosenC  -0.4927     0.6997  -0.704  0.48135   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trgIAC chosnC
## trgtIsAmbgC  0.048              
## chosenC     -0.330  0.037       
## trgtIsAmC:C  0.042 -0.412  0.158
confint(m, method="Wald")[5:8,]
##                                 2.5 %    97.5 %
## (Intercept)                -0.9610699 -0.171419
## targetIsAmbiguousC          0.1530674  1.502058
## chosenC                     0.2332100  1.545923
## targetIsAmbiguousC:chosenC -1.8640073  0.878679
Correlation between overall sampling preference and test accuracy
## preference for sampling ambiguous items and test performance
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 2")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 2")$prop_ambig_selection)
## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==  and subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==     "Experiment 2")$accuracy and     "Experiment 2")$prop_ambig_selection
## t = 1.3557, df = 36, p-value = 0.1836
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1068138  0.5045310
## sample estimates:
##       cor 
## 0.2203953
##plot
ggplot(filter(subj_selection,experiment_name=="Experiment 2"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  scale_color_manual(limits=c("ambiguous"),
                     values=c("#E41A1C"))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=24)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Probability of \nAmbiguous Selection")

Experiment 3

Sampling

Analysis of learners’ sampling preferences.

Plot

ggplot(filter(subj_selection,experiment_name=="Experiment 3"),aes(x=num_ambig_selection,fill=as.factor(num_ambig_selection),color=as.factor(num_ambig_selection)))+
  scale_fill_brewer(palette="Set1",direction=-1)+
  scale_color_brewer(palette="Set1",direction=-1)+
  geom_bar(stat="count",size=1.5,alpha=0.2,width=0.5)+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16),
        legend.position="none")+
  ylab("Number of subjects")+
  xlab("Number of ambiguous selections")+
  scale_x_continuous(breaks=c(0,1,2))+
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30))

Descriptives

#descriptives
subj_summary_3 <-  subj_selection %>%
  filter(experiment_name=="Experiment 3") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_3)
ambiguity_condition N prop_ambiguous prop_ambiguous_lower_ci prop_ambiguous_upper_ci
ambiguous_me 56 0.6339286 0.5436849 0.7241723

Logistic mixed-effects model

m=glmer(isAmbiguous ~ 1+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ 1 + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 3" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    153.1    161.3    -73.6    147.1      109 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3189 -1.3126  0.7582  0.7601  0.7618 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev. 
##  subject     (Intercept) 2.663e-15 5.160e-08
##  choiceImage (Intercept) 1.529e-03 3.911e-02
## Number of obs: 112, groups:  subject, 56; choiceImage, 4
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   0.5498     0.2031   2.707  0.00679 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(m,method="Wald")[3,]
##     2.5 %    97.5 % 
## 0.1517166 0.9479760

Note that we ignore a singular fit warning here. This appears to be caused by the inclusion of a by-item random intercept - however, it does not appear to adversely affect the model fit, and a simplified model with the by-stimulus random intercept removed yields virtually identical results.

Non-parametric test (Wilcoxon)

wilcox.test(filter(subj_selection,experiment_name=="Experiment 3")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  filter(subj_selection, experiment_name == "Experiment 3")$prop_ambig_selection
## V = 330, p-value = 0.005553
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.5000378 1.0000000
## sample estimates:
## (pseudo)median 
##      0.9999326

Relationship with Age

#predict ambiguous selections from age
m <- glmer(isAmbiguous~Age+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ Age + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 3" & trialType == "selection")
## 
##      AIC      BIC   logLik deviance df.resid 
##    148.4    159.3    -70.2    140.4      108 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2833 -1.0456  0.5800  0.7368  1.2408 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev.
##  subject     (Intercept) 1.081e-08 0.000104
##  choiceImage (Intercept) 2.176e-03 0.046648
## Number of obs: 112, groups:  subject, 56; choiceImage, 4
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -1.9284     1.0046  -1.919   0.0549 .
## Age           0.4551     0.1837   2.478   0.0132 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##     (Intr)
## Age -0.978
confint(m, method="Wald")[3:4,]
##                   2.5 %     97.5 %
## (Intercept) -3.89741195 0.04068293
## Age          0.09510293 0.81512510
#plot model predictions
pX <- data.frame(Age=seq(min(subset(d,experiment_name=="Experiment 3"&trialType=="selection")$Age,na.rm=T),max(subset(d,experiment_name=="Experiment 3"&trialType=="selection")$Age,na.rm=T),by=0.1))
pY <- predictSE(m,pX,re.form=NA,type="response")
pX$isAmbiguous <- pY$fit
pX$YLower <- pY$fit-pY$se.fit
pX$YUpper <- pY$fit+pY$se.fit
ggplot(pX,aes(Age,isAmbiguous))+
  geom_violinh(data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),scale="count",width=0.1, trim=F)+
  geom_jitter(data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),height=0.01)+
  geom_smooth(aes(ymin=YLower,ymax=YUpper),stat="identity",color="#E41A1C",fill="#E41A1C")+
  geom_hline(yintercept=0.5,linetype="dotted")+
  theme_classic(base_size=16)+
  xlab("Age (in years)")+
  scale_x_continuous(breaks=c(3,4,5,6,7,8))+
  ylab("Proportion of ambiguous selections")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))

Test

Analysis of learners’ test accuracy.

Plot

##plot by item type
#by item type
subj_summary_item_3 <- summarySEwithin(
  filter(
    subj_test_item,
    (experiment_name=="Experiment 3")),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci
  )

ggplot(subj_summary_item_3,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_dotplot(data=filter(subj_test_item,experiment_name=="Experiment 3"),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("mutual exclusivity","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("mutual exclusivity","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("mutual exclusivity","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))

Descriptives

Overall Test Accuracy
## descriptives
subj_summary_test_3 <-  subj_test %>%
  filter(experiment_name=="Experiment 3") %>%
  group_by(experiment_name,ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_correct=mean(accuracy,na.rm=T),
    ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
    prop_correct_lower_ci=prop_correct-ci,
    prop_correct_upper_ci=prop_correct+ci)
subj_summary_test_3 %>%
  select(-ci) %>%
  kable()
experiment_name ambiguity_condition N prop_correct prop_correct_lower_ci prop_correct_upper_ci
Experiment 3 ambiguous_me 56 0.5758929 0.4839003 0.6678855
Test Accuracy Split by Item
subj_summary_item_3 %>%
  select(-accuracy_norm,-se,-sd,-ci) %>%
  kable()
experiment_name ambiguity_condition targetIsAmbiguousYN N accuracy lower_ci upper_ci
Experiment 3 ambiguous_me no 56 0.6160714 0.5280317 0.7041111
Experiment 3 ambiguous_me yes 56 0.5357143 0.4476746 0.6237540

Logistic Mixed-Effects Model

Overall Test Accuracy
##logistic mixed-effects model
d$offset.25 <- 1/4
#overall
m <- glmer(isRight~offset(logit(offset.25))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 3"&trialType=="test"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ offset(logit(offset.25)) + (1 | subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 3" & trialType == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##    289.9    300.2   -142.0    283.9      221 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6324 -0.6345  0.3944  0.6126  1.2762 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 2.17173  1.4737  
##  targetImage (Intercept) 0.09708  0.3116  
## Number of obs: 224, groups:  subject, 56; targetImage, 4
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.5272     0.3007   5.079 3.79e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Test Accuracy by Item Type
d$offset.25 <- 1/4
##logistic mixed-effects model
#by item type
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 3"&trialType=="test"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 3" & trialType == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##    293.5    314.0   -140.8    281.5      218 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7428 -0.6127  0.3710  0.5865  1.3274 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr
##  subject     (Intercept)        2.5206   1.5876       
##              targetIsAmbiguousC 0.7476   0.8646   0.39
##  targetImage (Intercept)        0.1001   0.3164       
## Number of obs: 224, groups:  subject, 56; targetImage, 4
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)
## (Intercept)          0.4427     0.3168   1.397    0.162
## targetIsAmbiguousC  -0.4392     0.3615  -1.215    0.224
## 
## Correlation of Fixed Effects:
##             (Intr)
## trgtIsAmbgC 0.064
confint(m,method="Wald")[5:6,]
##                         2.5 %    97.5 %
## (Intercept)        -0.1781934 1.0636353
## targetIsAmbiguousC -1.1478162 0.2693513

Sampling and test accuracy

Overall Accuracy for Sampled vs. Not Sampled Items
#summarize test accuracy by choice
subj_test_choice_3 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 3")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))

subj_summary_choice_3 <- summarySEwithin(
  subj_test_choice_3,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )
subj_summary_choice_3 %>%
  select(-accuracy_norm, -sd,-se,-ci) %>%
  kable()
experiment_name ambiguity_condition chosen N accuracy lower_ci upper_ci chosen_factor
Experiment 3 ambiguous_me 0 56 0.5595238 0.4653227 0.6537249 NOT SAMPLED
Experiment 3 ambiguous_me 1 56 0.5982143 0.5040132 0.6924154 SAMPLED
Plot by Item Type
subj_test_item_choice_3 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 3")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))

subj_summary_item_choice_3 <- summarySEwithin(
  subj_test_item_choice_3,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen","targetIsAmbiguousYN"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )

ggplot(subj_summary_item_choice_3,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("mutual exclusivity","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("mutual exclusivity","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("mutual exclusivity","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))+
  facet_wrap(~chosen_factor)

Logistic Mixed-Effects Model
m <- glmer(isRight~targetIsAmbiguousC*chosenC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 3"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguousC * chosenC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: subset(d, trialType == "test" & experiment_name == "Experiment 3")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    296.5    323.8   -140.3    280.5      216 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9202 -0.6319  0.3743  0.5870  1.5328 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr
##  subject     (Intercept)        2.47115  1.5720       
##              targetIsAmbiguousC 0.52951  0.7277   0.38
##  targetImage (Intercept)        0.09831  0.3135       
## Number of obs: 224, groups:  subject, 56; targetImage, 4
## 
## Fixed effects:
##                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                  0.4250     0.3188   1.333    0.182
## targetIsAmbiguousC          -0.5291     0.3671  -1.441    0.150
## chosenC                      0.3414     0.3474   0.983    0.326
## targetIsAmbiguousC:chosenC   0.2390     0.8039   0.297    0.766
## 
## Correlation of Fixed Effects:
##             (Intr) trgIAC chosnC
## trgtIsAmbgC  0.045              
## chosenC      0.001 -0.255       
## trgtIsAmC:C -0.164  0.003  0.031
confint(m, method="Wald")[5:8,]
##                                 2.5 %    97.5 %
## (Intercept)                -0.1997402 1.0497618
## targetIsAmbiguousC         -1.2487101 0.1904759
## chosenC                    -0.3395635 1.0223851
## targetIsAmbiguousC:chosenC -1.3365340 1.8145833
Correlation between overall sampling preference and test accuracy
##relationship between sampling preference for ambiguous items and test accuracy
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous_me"&experiment_name=="Experiment 3")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous_me"&experiment_name=="Experiment 3")$prop_ambig_selection)
## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous_me" &  and subset(subj_selection, ambiguity_condition == "ambiguous_me" &     experiment_name == "Experiment 3")$accuracy and     experiment_name == "Experiment 3")$prop_ambig_selection
## t = 0.49839, df = 54, p-value = 0.6202
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1987701  0.3247892
## sample estimates:
##        cor 
## 0.06766648
##plot
ggplot(filter(subj_selection,experiment_name=="Experiment 3"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  scale_color_manual(limits=c("ambiguous_me"),
                     values=c("#E41A1C"))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=24)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Probability of \nAmbiguous Selection")

Experiment S1

Sampling

Analysis of learners’ sampling preferences.

Plot

## Plot
d$conditionFull <- ifelse(d$ambiguity_condition=="ambiguous",0,
                          ifelse(d$ambiguity_condition=="partially ambiguous",-1,NA))
m <- glmer(isAmbiguous~conditionFull+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
model_pred <- data.frame(conditionFull=c(-1,0),ambiguity_condition=c("partially ambiguous","ambiguous"))
pY <- predictSE(m,model_pred, type="response")
model_pred <- model_pred %>%
  mutate(
    prop_ambiguous = pY$fit,
    prop_ambiguous_lower_ci = pY$fit - 1.96*pY$se.fit,
    prop_ambiguous_upper_ci = pY$fit + 1.96*pY$se.fit)
p_expS1_sampling <- ggplot(model_pred,aes(x=ambiguity_condition,y=prop_ambiguous,color=ambiguity_condition,fill=ambiguity_condition))+
  geom_bar(stat="identity",size=2.5,fill="white",width=0.5)+
  geom_dotplot(data=subset(subj_selection,experiment_name=="Experiment S1"), aes(y=prop_ambig_selection),binaxis="y",stackdir="center",alpha=0.5,dotsize=0.6)+
  geom_errorbar(aes(ymin=prop_ambiguous_lower_ci,ymax=prop_ambiguous_upper_ci),width=0,size=1.2)+
  ylab("Probability of \nAmbiguous Selection")+
  geom_hline(yintercept=0.5,linetype="dotted")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  theme_classic(base_size=18)+
  theme(legend.position="none")+
  scale_x_discrete(name="Condition")
p_expS1_sampling

Descriptives

#descriptives
subj_summary_s1 <-  subj_selection %>%
  filter(experiment_name=="Experiment S1") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_s1)
ambiguity_condition N prop_ambiguous prop_ambiguous_lower_ci prop_ambiguous_upper_ci
ambiguous 28 0.6428571 0.5359183 0.7497960
partially ambiguous 34 0.4779412 0.3910626 0.5648197

Logistic mixed-effects model

Condition Comparison

Estimating the difference in preference for selecting ambiguous items in the Partially Ambiguous condition vs. the Fully Ambiguous condition.

d$conditionC <- ifelse(d$ambiguity_condition=="ambiguous",0.5, 
                       ifelse(d$ambiguity_condition=="partially ambiguous",-0.5,NA))
m <- glmer(isAmbiguous~conditionC+(1|subject)+(1|choiceImage),data=subset(d,(experiment_name=="Experiment S1")&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ conditionC + (1 | subject) + (1 | choiceImage)
##    Data: subset(d, (experiment_name == "Experiment S1") & trialType ==  
##     "selection")
## Control: glmerControl(check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##      342      356     -167      334      244 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3825 -0.9601  0.6865  0.9394  1.0968 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev. 
##  subject     (Intercept) 1.151e-01 3.393e-01
##  choiceImage (Intercept) 8.752e-10 2.958e-05
## Number of obs: 248, groups:  subject, 62; choiceImage, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   0.2569     0.1401   1.833   0.0668 .
## conditionC    0.6955     0.2815   2.471   0.0135 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## conditionC 0.145
confint(m,method="Wald")[3:4,]
##                   2.5 %    97.5 %
## (Intercept) -0.01777231 0.5315834
## conditionC   0.14384157 1.2472401
Fully Ambiguous Condition

Estimating the preference for (fully) ambiguous items by re-centering the model on the fully ambiguous condition. The intercept represents the preference for selecting ambiguous items in the Fully Ambiguous condition.

## Fully Ambiguous Condition
d$conditionFull <- ifelse(d$ambiguity_condition=="ambiguous",0,
                          ifelse(d$ambiguity_condition=="partially ambiguous",-1,NA))

m <- glmer(isAmbiguous~conditionFull+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ conditionFull + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment S1" & trialType == "selection")
## Control: glmerControl(check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##      342      356     -167      334      244 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3825 -0.9601  0.6865  0.9394  1.0968 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.1151   0.3393  
##  choiceImage (Intercept) 0.0000   0.0000  
## Number of obs: 248, groups:  subject, 62; choiceImage, 8
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)     0.6047     0.2125   2.845  0.00444 **
## conditionFull   0.6955     0.2815   2.471  0.01347 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## conditinFll 0.758
confint(m,method="Wald")[3:4,]
##                   2.5 %   97.5 %
## (Intercept)   0.1881258 1.021231
## conditionFull 0.1438395 1.247238
Partially Ambiguous Condition

Estimating the preference for (partially) ambiguous items by re-centering the model on the partially ambiguous condition. The intercept represents the preference for selecting ambiguous items in the Partially Ambiguous condition.

d$conditionPartial <- ifelse(d$ambiguity_condition=="ambiguous",1,
                            ifelse(d$ambiguity_condition=="partially ambiguous",0,NA))

m <- glmer(isAmbiguous~conditionPartial+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ conditionPartial + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment S1" & trialType == "selection")
## Control: glmerControl(check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##      342      356     -167      334      244 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3825 -0.9601  0.6865  0.9394  1.0968 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.1151   0.3393  
##  choiceImage (Intercept) 0.0000   0.0000  
## Number of obs: 248, groups:  subject, 62; choiceImage, 8
## 
## Fixed effects:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)      -0.09086    0.18365  -0.495   0.6208  
## conditionPartial  0.69553    0.28148   2.471   0.0135 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## conditnPrtl -0.656
confint(m,method="Wald")[3:4,]
##                       2.5 %    97.5 %
## (Intercept)      -0.4508106 0.2690974
## conditionPartial  0.1438339 1.2472347

Non-parametric test (Wilcoxon)

Condition Comparison
##condition comparison
wilcox.test(subset(subj_selection , ambiguity_condition=="ambiguous"&experiment_name=="Experiment S1")$prop_ambig_selection,
            subset(subj_selection, ambiguity_condition=="partially ambiguous"&experiment_name=="Experiment S1")$prop_ambig_selection,
            conf.int=T, conf.level=0.95)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==  and subset(subj_selection, ambiguity_condition == "partially ambiguous" &     "Experiment S1")$prop_ambig_selection and     experiment_name == "Experiment S1")$prop_ambig_selection
## W = 631.5, p-value = 0.02256
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  2.599459e-05 2.500091e-01
## sample estimates:
## difference in location 
##              0.2499557
Fully Ambiguous Condition
##Fully Ambiguous Condition
wilcox.test(subset(subj_selection, experiment_name=="Experiment S1"&ambiguity_condition=="ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  subset(subj_selection, experiment_name == "Experiment S1" & ambiguity_condition ==     "ambiguous")$prop_ambig_selection
## V = 140, p-value = 0.01534
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.5000486 0.8750296
## sample estimates:
## (pseudo)median 
##       0.750061
Partially Ambiguous Condition
##Partially Ambiguous Condition
wilcox.test(subset(subj_selection, experiment_name=="Experiment S1"&ambiguity_condition=="partially ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  subset(subj_selection, experiment_name == "Experiment S1" & ambiguity_condition ==     "partially ambiguous")$prop_ambig_selection
## V = 100.5, p-value = 0.5901
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.2500428 0.6249926
## sample estimates:
## (pseudo)median 
##      0.4999225

Test

Analysis of learners’ test performance.

##summarize participant accuracy by test half
subj_test_half <- d %>%
  filter(trialType=="test"&experiment_name=="Experiment S1") %>%
  group_by(subject,experiment_name,ambiguity_condition, testHalf) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
##by item and test half
subj_test_half_item <- d %>%
  filter(trialType=="test"&experiment_name=="Experiment S1") %>%
  group_by(subject,experiment_name,ambiguity_condition, testHalf,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))

Plot

#item summary within-subjects corrected CIs
subj_summary_item_testhalf_correctedCIs_s1 <- summarySEwithin(
  filter(
    subj_test_half_item,
    (experiment_name=="Experiment S1")),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("testHalf","targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    testHalf_name = paste("Test Block",testHalf,sep=" ")
  )

#Experiment S1 - test
p_expS1_fulltest <- ggplot(filter(subj_summary_item_testhalf_correctedCIs_s1,ambiguity_condition=="ambiguous"),aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_point(data=filter(subj_test_half_item,experiment_name=="Experiment S1"&ambiguity_condition=="ambiguous"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type\n\nFully Ambiguous Condition",
                   limits=c("no","yes"),
                   labels=c("disambig-\nuated","fully\nambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
  theme_classic(base_size=12)+
  theme(legend.position="none", axis.text.x=element_text(size=9))+
  facet_wrap(~testHalf_name)

p_expS1_partialtest <- ggplot(filter(subj_summary_item_testhalf_correctedCIs_s1, ambiguity_condition=="partially ambiguous"),aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_point(data=filter(subj_test_half_item,experiment_name=="Experiment S1"&ambiguity_condition=="partially ambiguous"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","partially\nambiguous"),
                     values=c("#4DAF4A","#377EB8"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","partially\nambiguous"),
                    values=c("#4DAF4A","#377EB8"))+
  scale_x_discrete(name = "Item Type\n\nPartially Ambiguous Condition",
                   limits=c("no","yes"),
                   labels=c("disambig-\nuated","partially\nambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
  theme_classic(base_size=12)+
  theme(legend.position="none", axis.text.x=element_text(size=9))+
  facet_wrap(~testHalf_name)
plot_grid(p_expS1_fulltest,p_expS1_partialtest, labels=c("A","B"),rel_widths=c(1,1),label_size=24,nrow=1)

Descriptives

Overall Test Accuracy

Split by test half.

## descriptives
subj_summary_test_s1 <-  summarySEwithin(
  subj_test_half,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("testHalf"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    testHalf_name = paste("Test Block",testHalf,sep=" ")
  ) %>%
  select(-accuracy_norm,-se,-sd,-ci)
  
kable(subj_summary_test_s1)
experiment_name ambiguity_condition testHalf N accuracy lower_ci upper_ci testHalf_name
Experiment S1 ambiguous 1 28 0.6517857 0.6118389 0.6917325 Test Block 1
Experiment S1 ambiguous 2 28 0.7276786 0.6877318 0.7676254 Test Block 2
Experiment S1 partially ambiguous 1 34 0.7683824 0.7248067 0.8119580 Test Block 1
Experiment S1 partially ambiguous 2 34 0.7757353 0.7321596 0.8193110 Test Block 2
Test Accuracy Split by Item

Split by test half.

## split by item
subj_summary_test_item_s1 <-  summarySEwithin(
  subj_test_half_item,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("testHalf","targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    testHalf_name = paste("Test Block",testHalf,sep=" ")
  ) %>%
  select(-accuracy_norm,-se,-sd,-ci)

kable(subj_summary_test_item_s1)
experiment_name ambiguity_condition testHalf targetIsAmbiguousYN N accuracy lower_ci upper_ci testHalf_name
Experiment S1 ambiguous 1 no 28 0.8392857 0.7786705 0.8999009 Test Block 1
Experiment S1 ambiguous 1 yes 28 0.4642857 0.3776673 0.5509041 Test Block 1
Experiment S1 ambiguous 2 no 28 0.8571429 0.7783393 0.9359464 Test Block 2
Experiment S1 ambiguous 2 yes 28 0.5982143 0.5156617 0.6807669 Test Block 2
Experiment S1 partially ambiguous 1 no 34 0.7500000 0.6896160 0.8103840 Test Block 1
Experiment S1 partially ambiguous 1 yes 34 0.7867647 0.7272860 0.8462434 Test Block 1
Experiment S1 partially ambiguous 2 no 34 0.7941176 0.7390598 0.8491754 Test Block 2
Experiment S1 partially ambiguous 2 yes 34 0.7573529 0.6839887 0.8307171 Test Block 2

Logistic mixed-effects model

Three-Way Interaction: Effects of Item Type and Test Half across Conditions
d$testHalfC <- ifelse(!is.na(d$testHalf) & d$testHalf==2,0.5,
                      ifelse(!is.na(d$testHalf) & d$testHalf==1,-0.5,NA))

#three-way interaction
m <- glmer(isRight~targetIsAmbiguousC*conditionC*testHalfC+(1+targetIsAmbiguousC*testHalfC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment S1")&trialType=="test"&ambiguity_condition!="non ambiguous"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguousC * conditionC * testHalfC + (1 +  
##     targetIsAmbiguousC * testHalfC | subject) + (1 | targetImage)
##    Data: filter(d, (experiment_name == "Experiment S1") & trialType ==  
##     "test" & ambiguity_condition != "non ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    846.0    939.1   -404.0    808.0      973 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0148 -0.2249  0.1640  0.3500  4.4299 
## 
## Random effects:
##  Groups      Name                         Variance Std.Dev. Corr          
##  subject     (Intercept)                  5.4970   2.3446                 
##              targetIsAmbiguousC           1.0299   1.0148   0.02          
##              testHalfC                    0.7031   0.8385   0.99 0.19     
##              targetIsAmbiguousC:testHalfC 0.5055   0.7110   0.35 0.94 0.50
##  targetImage (Intercept)                  0.1513   0.3889                 
## Number of obs: 992, groups:  subject, 62; targetImage, 8
## 
## Fixed effects:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                               2.0641     0.3900   5.292 1.21e-07
## targetIsAmbiguousC                       -1.3301     0.3950  -3.368 0.000758
## conditionC                               -0.7664     0.6784  -1.130 0.258643
## testHalfC                                 0.8217     0.3614   2.273 0.023007
## targetIsAmbiguousC:conditionC            -2.7515     0.5694  -4.832 1.35e-06
## targetIsAmbiguousC:testHalfC             -0.1361     0.6891  -0.197 0.843444
## conditionC:testHalfC                      0.1909     0.5104   0.374 0.708427
## targetIsAmbiguousC:conditionC:testHalfC   0.7522     0.9304   0.808 0.418824
##                                            
## (Intercept)                             ***
## targetIsAmbiguousC                      ***
## conditionC                                 
## testHalfC                               *  
## targetIsAmbiguousC:conditionC           ***
## targetIsAmbiguousC:testHalfC               
## conditionC:testHalfC                       
## targetIsAmbiguousC:conditionC:testHalfC    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trgIAC cndtnC tstHlC trIAC:C tIAC:H cnC:HC
## trgtIsAmbgC -0.174                                           
## conditionC   0.028 -0.129                                    
## testHalfC    0.447 -0.127  0.000                             
## trgtIsAmC:C -0.108 -0.019 -0.003 -0.067                      
## trgtIsAC:HC -0.031  0.418 -0.020 -0.306 -0.038               
## cndtnC:tsHC -0.014 -0.097  0.476 -0.077  0.034  -0.236       
## trgIAC:C:HC -0.039 -0.045  0.059 -0.247  0.260  -0.093  0.086
confint(m, method="Wald")[12:19,]
##                                              2.5 %     97.5 %
## (Intercept)                              1.2996085  2.8285636
## targetIsAmbiguousC                      -2.1042260 -0.5560018
## conditionC                              -2.0960427  0.5633383
## testHalfC                                0.1132606  1.5301136
## targetIsAmbiguousC:conditionC           -3.8675112 -1.6355179
## targetIsAmbiguousC:testHalfC            -1.4867027  1.2145210
## conditionC:testHalfC                    -0.8095147  1.1912735
## targetIsAmbiguousC:conditionC:testHalfC -1.0713783  2.5757980
Specifically test the increase across test halfs for ambiguous items in the Fully Ambiguous Condition
#test specifically test half increase for ambiguous items in the Fully Ambiguous condition
d$targetIsAmbiguous_ambiguous <- ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==1,0,
                                        ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==0,-1,NA))
m <- glmer(isRight~targetIsAmbiguous_ambiguous*conditionFull*testHalfC+(1+targetIsAmbiguous_ambiguous*testHalfC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment S1")&trialType=="test"&ambiguity_condition!="non ambiguous"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguous_ambiguous * conditionFull * testHalfC +  
##     (1 + targetIsAmbiguous_ambiguous * testHalfC | subject) +  
##     (1 | targetImage)
##    Data: filter(d, (experiment_name == "Experiment S1") & trialType ==  
##     "test" & ambiguity_condition != "non ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    846.0    939.1   -404.0    808.0      973 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0148 -0.2249  0.1640  0.3500  4.4299 
## 
## Random effects:
##  Groups      Name                                  Variance Std.Dev. Corr     
##  subject     (Intercept)                           5.8087   2.4101            
##              targetIsAmbiguous_ambiguous           1.0299   1.0148   0.23     
##              testHalfC                             1.1294   1.0627   0.97 0.46
##              targetIsAmbiguous_ambiguous:testHalfC 0.5055   0.7110   0.54 0.94
##  targetImage (Intercept)                           0.1513   0.3889            
##      
##      
##      
##      
##  0.73
##      
## Number of obs: 992, groups:  subject, 62; targetImage, 8
## 
## Fixed effects:
##                                                     Estimate Std. Error z value
## (Intercept)                                           0.3280     0.5258   0.624
## targetIsAmbiguous_ambiguous                          -2.7059     0.4825  -5.608
## conditionFull                                        -2.1421     0.7349  -2.915
## testHalfC                                             1.0371     0.4364   2.377
## targetIsAmbiguous_ambiguous:conditionFull            -2.7515     0.5694  -4.833
## targetIsAmbiguous_ambiguous:testHalfC                 0.2400     0.7947   0.302
## conditionFull:testHalfC                               0.5670     0.7195   0.788
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC   0.7522     0.9303   0.809
##                                                     Pr(>|z|)    
## (Intercept)                                          0.53276    
## targetIsAmbiguous_ambiguous                         2.05e-08 ***
## conditionFull                                        0.00356 ** 
## testHalfC                                            0.01747 *  
## targetIsAmbiguous_ambiguous:conditionFull           1.35e-06 ***
## targetIsAmbiguous_ambiguous:testHalfC                0.76263    
## conditionFull:testHalfC                              0.43070    
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC  0.41877    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trgIA_ cndtnF tstHlC trIA_:F tIA_:H cnF:HC
## trgtIsAmbg_ 0.236                                            
## conditinFll 0.640  0.123                                     
## testHalfC   0.498  0.161  0.300                              
## trgtIsAm_:F 0.181  0.575  0.384  0.073                       
## trgtIsA_:HC 0.133  0.346  0.062  0.481  0.119                
## cndtnFll:HC 0.232  0.033  0.422  0.467  0.192   0.217        
## trgIA_:F:HC 0.062  0.117  0.155  0.305  0.260   0.505  0.707
confint(m, method="Wald")[12:19,]
##                                                          2.5 %     97.5 %
## (Intercept)                                         -0.7025257  1.3584754
## targetIsAmbiguous_ambiguous                         -3.6515408 -1.7602106
## conditionFull                                       -3.5824604 -0.7017594
## testHalfC                                            0.1818284  1.8924395
## targetIsAmbiguous_ambiguous:conditionFull           -3.8674642 -1.6355665
## targetIsAmbiguous_ambiguous:testHalfC               -1.3175109  1.7975297
## conditionFull:testHalfC                             -0.8432513  1.9772172
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC -1.0711732  2.5755888

Relationship between sampling and test performance

Correlations in Test Half 1 and 2 between sampling preference and test accuracy

Fully Ambiguous Condition - Test Block 1

#correlations Experiment S1: proportion ambiguous items selected and test accuracy
subj_test_half <- subj_test_half %>% 
  left_join(select(subj_selection,subject,prop_ambig_selection)) %>%
  mutate(
    testHalf_name = paste("Test Block",testHalf,sep=" "))

#Fully Ambiguous - test block 1
cor.test(subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==1)$accuracy,subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==1)$prop_ambig_selection)
## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==  and subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==     1)$accuracy and     1)$prop_ambig_selection
## t = 2.5909, df = 26, p-value = 0.01549
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.09617591 0.70664954
## sample estimates:
##       cor 
## 0.4529988

Fully Ambiguous Condition - Test Block 2

#Fully Ambiguous - test block 2
cor.test(subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==2)$accuracy,subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==2)$prop_ambig_selection)
## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==  and subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==     2)$accuracy and     2)$prop_ambig_selection
## t = 1.8848, df = 26, p-value = 0.07069
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03028924  0.63734347
## sample estimates:
##       cor 
## 0.3467055

Partially Ambiguous Condition - Test Block 1

#Partially Ambiguous - test block 1
cor.test(subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==1)$accuracy,subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==1)$prop_ambig_selection)
## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "partially ambiguous" &  and subset(subj_test_half, ambiguity_condition == "partially ambiguous" &     testHalf == 1)$accuracy and     testHalf == 1)$prop_ambig_selection
## t = 0.55366, df = 32, p-value = 0.5837
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2489571  0.4216846
## sample estimates:
##        cor 
## 0.09740938

Partially Ambiguous Condition - Test Block 2

#Partially Ambiguous - test block 2
cor.test(subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==2)$accuracy,subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==2)$prop_ambig_selection)
## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "partially ambiguous" &  and subset(subj_test_half, ambiguity_condition == "partially ambiguous" &     testHalf == 2)$accuracy and     testHalf == 2)$prop_ambig_selection
## t = -0.67859, df = 32, p-value = 0.5023
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4395666  0.2282537
## sample estimates:
##        cor 
## -0.1191055
Plot
#Experiment S1 - sampling-test
pS1_fullsampling_test <- ggplot(filter(subj_test_half,ambiguity_condition=="ambiguous"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  scale_color_manual(limits=c("ambiguous"),
                     labels=c("fully ambiguous"),
                     values=c("#E41A1C"))+
  #geom_point(position=position_jitter(width=.05,height=.0))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  #ylim(0,1.08)+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=12)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Proportion of Ambiguous Selections\n\nFully Ambiguous Condition")+
  facet_wrap(~testHalf_name)
pS1_partialsampling_test <- ggplot(filter(subj_test_half,ambiguity_condition=="partially ambiguous"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  scale_color_manual(limits=c("partially ambiguous"),
                     labels=c("partially ambiguous"),
                     values=c("#377EB8"))+
  #geom_point(position=position_jitter(width=.05,height=.0))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  #ylim(0,1.08)+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=12)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Proportion of Ambiguous Selections\n\nPartially Ambiguous Condition")+
  facet_wrap(~testHalf_name)
plot_grid(pS1_fullsampling_test,pS1_partialsampling_test,labels=c("A","B"),rel_widths=c(1,1),label_size=24,nrow=1)