Sampling to learn words: Adults and children sample words that reduce referential ambiguity

Analysis walkthrough for the paper

Zettersten, M., & Saffran, J. (accepted). Sampling to learn words: Adults and children sample words that reduce referential ambiguity. Developmental Science.

For more background information and the underlying data, visit the OSF repository for this report: OSF site

See crossact_codebook for information about individual columns.

Reading in data and preliminary processing

d <- read.csv(data_path)

Exclusions

Overview over exclusions in each experiment.

##exclusions
exclusions <-  d %>%
  group_by(subject,experiment_name) %>%
  summarize(Exclude=Exclude[1]) %>%
  ungroup()  %>%
  group_by(experiment_name) %>%
  summarize(N=n(),num_exclusions=sum(Exclude!="N"))
kable(exclusions)

experiment_name	N	num_exclusions
Experiment 1	31	3
Experiment 2	40	2
Experiment 3	58	2
Experiment S1	62	0

#remove excluded participants
d <- d %>%
  filter(Exclude=="N")

Demographics

Overview over demographic characteristics of each sample.

##demographics (exclusions filtered)
demographics <- d %>%
  filter(Exclude=="N") %>%
  group_by(subject,age_group,experiment_name) %>%
  summarize(Gender=Gender[1],Age=Age[1],L1_english=L1_english[1],languages_besides_english_yn=languages_besides_english_yn[1],L1=L1[1],L1percent=L1percent[1]) %>%
  ungroup() %>%
  group_by(age_group,experiment_name)  %>%
  summarize(
    N=n(),
    gender_f=sum(Gender=="female"),
    mean_age=round(mean(Age,na.rm=T),2),
    sd_age=round(sd(Age,na.rm=T),2),
    min_age=round(min(Age,na.rm=T),2),
    max_age=round(max(Age,na.rm=T),2),
    native_english=ifelse(is.na(sum(L1_english=="English")),sum(L1=="English",na.rm=T),sum(L1_english=="English",na.rm=T)),
    languages_besides_english=ifelse(age_group[1]=="adults",NA,sum(languages_besides_english_yn=="Yes",na.rm=T)),
    monolingual=ifelse(age_group[1]=="adults",NA,sum(L1percent>=90,na.rm=T))
  )

Experiment 1

demographics %>%
  filter(experiment_name=="Experiment 1") %>%
  kable()

age_group	experiment_name	N	gender_f	mean_age	sd_age	min_age	max_age	native_english	languages_besides_english	monolingual
adults	Experiment 1	28	8	31.39	7.25	19	48	28	NA	NA

Experiment 2

Gender, Age, Native Language

demographics %>%
  filter(experiment_name=="Experiment 2") %>%
  kable()

age_group	experiment_name	N	gender_f	mean_age	sd_age	min_age	max_age	native_english	languages_besides_english	monolingual
kids	Experiment 2	38	19	5.9	1.19	4.1	8.12	38	6	32

Ethnicity

d %>%
  filter(Exclude=="N"&experiment_name=="Experiment 2") %>%
  group_by(subject) %>%
  summarize(hispanic=hispanic[1],ethnicity=ethnicity[1]) %>%
  ungroup() %>%
  group_by(hispanic,ethnicity) %>%
  summarize(count=n()) %>%
  kable()

hispanic	ethnicity	count
No	Asian	1
No	Asian,White	2
No	Black or African American,White	1
No	Other	1
No	White	30
Yes	Other	1
Yes	White	1
NA	White	1

Experiment 3

Note that we are currently missing some demographic information (ethnicity & language) for 6 of the 56 participants, due to experimenter error.

Gender, Age, Native Language

demographics %>%
  filter(experiment_name=="Experiment 3") %>%
  kable()

age_group	experiment_name	N	gender_f	mean_age	sd_age	min_age	max_age	native_english	languages_besides_english	monolingual
kids	Experiment 3	56	33	5.53	1.18	3.29	7.88	50	17	38

Ethnicity

d %>%
  filter(Exclude=="N"&experiment_name=="Experiment 3") %>%
  group_by(subject) %>%
  summarize(hispanic=hispanic[1],ethnicity=ethnicity[1]) %>%
  ungroup() %>%
  group_by(hispanic,ethnicity) %>%
  summarize(count=n()) %>%
  kable()

hispanic	ethnicity	count
No	Asian	8
No	Asian,Black or African American	1
No	Asian,White	1
No	Black or African American	1
No	Black or African American,White	1
No	Black or African American,White,Other	1
No	Other	1
No	White	33
Yes	Other	1
Yes	White	1
NA	White	1
NA	NA	6

Experiment S1

demographics %>%
  filter(experiment_name=="Experiment S1") %>%
  kable()

age_group	experiment_name	N	gender_f	mean_age	sd_age	min_age	max_age	native_english	languages_besides_english	monolingual
adults	Experiment S1	62	27	19.13	1.01	18	22	56	NA	NA

Summarizing data by participant

First, we summarize sampling and test behavior by participant and store these objects for later plotting and analysis.

Sampling

#Summarize sampling and test behavior by subject
#used in later analyses and plotting

#selections by subject
subj_selection <- d %>%
  filter(trialType=="selection") %>%
  group_by(experiment_name,ambiguity_condition,subject) %>%
  summarize(
    N=n(),
    prop_ambig_selection=sum(selectionType!="low")/N,
    num_ambig_selection=sum(selectionType!="low"))

subj_selection %>%
  arrange(experiment_name,ambiguity_condition,subject) %>%
  DT::datatable()

Overall Test

#test performance by subject
subj_test <- d %>%
  filter(trialType=="test") %>%
  group_by(experiment_name,ambiguity_condition,subject) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
subj_test %>%
  arrange(experiment_name,ambiguity_condition,subject) %>%
  DT::datatable()

Test By Item

#test performance split by item type (ambiguous vs. non-ambiguous)
subj_test_item <- d %>%
  filter(trialType=="test") %>%
  group_by(experiment_name,ambiguity_condition,subject,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
subj_test_item %>%
  arrange(experiment_name,ambiguity_condition,subject) %>%
  DT::datatable()

Experiment 1

Sampling

Analysis of learners’ sampling preferences.

Plot

Plot is based on estimates from the logistic mixed-effects model

#create data frame with model predictions
m <- glmer(isAmbiguous~(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 1"&trialType=="selection"&ambiguity_condition=="ambiguous"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
model_pred <- data.frame(ambiguity_condition="ambiguous")
pY <- predictSE(m,model_pred, type="response")
model_pred <- model_pred %>%
  mutate(
    experiment_name= "Experiment 1",
    prop_ambiguous = pY$fit,
    prop_ambiguous_lower_ci = pY$fit - 1.96*pY$se.fit,
    prop_ambiguous_upper_ci = pY$fit + 1.96*pY$se.fit)

#create plot
p_exp1_sampling <- ggplot(subset(model_pred,ambiguity_condition=="ambiguous"),aes(x=experiment_name,y=prop_ambiguous,color=ambiguity_condition,fill=ambiguity_condition))+
  geom_bar(stat="identity",size=2.5,fill="white",width=0.5)+
  geom_dotplot(data=subset(subj_selection,experiment_name=="Experiment 1"&ambiguity_condition=="ambiguous"), aes(y=prop_ambig_selection),binaxis="y",stackdir="center",alpha=0.5,dotsize=0.6)+
  geom_errorbar(aes(ymin=prop_ambiguous_lower_ci,ymax=prop_ambiguous_upper_ci),width=0,size=1.2)+
  ylab("Probability of \nAmbiguous Selection")+
  geom_hline(yintercept=0.5,linetype="dotted")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  theme_classic(base_size=24)+
  theme(legend.position="none")+
  scale_x_discrete(name="Experiment 1")+
  theme(axis.ticks.x = element_blank(), axis.text.x = element_blank())
p_exp1_sampling

Descriptives

#descriptives
subj_summary_1 <-  subj_selection %>%
  filter(experiment_name=="Experiment 1") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_1)

ambiguity_condition	N	prop_ambiguous	prop_ambiguous_lower_ci	prop_ambiguous_upper_ci
ambiguous	28	0.625	0.5084928	0.7415072

Logistic mixed-effects model

m <- glmer(isAmbiguous~(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 1"&trialType=="selection"&ambiguity_condition=="ambiguous"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 1" & trialType == "selection" &  
##     ambiguity_condition == "ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    150.9    159.1    -72.5    144.9      109 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4998 -0.9396  0.5155  0.6668  1.0642 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.845    0.9193  
##  choiceImage (Intercept) 0.000    0.0000  
## Number of obs: 112, groups:  subject, 28; choiceImage, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   0.6155     0.2845   2.163   0.0305 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

confint(m,method="Wald")[3,]

##      2.5 %     97.5 % 
## 0.05777627 1.17315329

Note that we ignore a singular fit warning here. This appears to be caused by the inclusion of a by-item random intercept - however, it does not appear to adversely affect the model fit, and a simplified model with the by-stimulus random intercept removed yields virtually identical results.

Non-parametric test (Wilcoxon)

wilcox.test(filter(subj_selection, experiment_name=="Experiment 1"&ambiguity_condition=="ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  filter(subj_selection, experiment_name == "Experiment 1" & ambiguity_condition ==     "ambiguous")$prop_ambig_selection
## V = 151.5, p-value = 0.02026
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.5000775 0.8750528
## sample estimates:
## (pseudo)median 
##       0.625024

Test

Analysis of learners’ test performance.

Plot

## split by item, within-subjects corrected CIs
subj_summary_test_item_1 <-  summarySEwithin(
  filter(
    subj_test_item,
    experiment_name=="Experiment 1"),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci
  ) %>%
  select(-accuracy_norm,-sd,-se,-ci)
## by-item test plot
ggplot(subj_summary_test_item_1,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_point(data=filter(subj_test_item,experiment_name=="Experiment 1"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","fully\nambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))

Descriptives

Overall Test Accuracy

## descriptives
subj_summary_test_1 <-  subj_test %>%
  filter(experiment_name=="Experiment 1") %>%
  group_by(experiment_name,ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_correct=mean(accuracy,na.rm=T),
    ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
    prop_correct_lower_ci=prop_correct-ci,
    prop_correct_upper_ci=prop_correct+ci
  ) %>%
  select(-ci)
kable(subj_summary_test_1)

experiment_name	ambiguity_condition	N	prop_correct	prop_correct_lower_ci	prop_correct_upper_ci
Experiment 1	ambiguous	28	0.65625	0.5265768	0.7859232

Test Accuracy Split by Item

kable(subj_summary_test_item_1)

experiment_name	ambiguity_condition	targetIsAmbiguousYN	N	accuracy	lower_ci	upper_ci
Experiment 1	ambiguous	no	28	0.6785714	0.5909601	0.7661827
Experiment 1	ambiguous	yes	28	0.6339286	0.5463173	0.7215399

Logistic mixed-effects model

Overall Test Accuracy

## testing overall accuracy against chance
d$offset.125 <- 1/8
m <- glmer(isRight~offset(logit(offset.125))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 1"&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## isRight ~ offset(logit(offset.125)) + (1 | subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 1" & trialType == "test")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    236.0    246.3   -115.0    230.0      221 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1422 -0.4954  0.2084  0.5398  2.0369 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 5.30551  2.3034  
##  targetImage (Intercept) 0.09789  0.3129  
## Number of obs: 224, groups:  subject, 28; targetImage, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.1893     0.5097   6.258 3.91e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Testing Difference between Items

## testing difference between items
d$targetIsAmbiguousC <- ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==1,0.5,
                                        ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==0,-0.5,NA))
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment 1")&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: filter(d, (experiment_name == "Experiment 1") & trialType ==  
##     "test")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    233.5    253.9   -110.7    221.5      218 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0032 -0.4774  0.0571  0.3672  1.7067 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr 
##  subject     (Intercept)        9.14612  3.024         
##              targetIsAmbiguousC 8.80532  2.967    -0.98
##  targetImage (Intercept)        0.01905  0.138         
## Number of obs: 224, groups:  subject, 28; targetImage, 8
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)          1.6280     0.7177   2.268   0.0233 *
## targetIsAmbiguousC  -1.5652     0.9516  -1.645   0.1000  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## trgtIsAmbgC -0.823

confint(m, method="Wald")[5:6,]

##                         2.5 %    97.5 %
## (Intercept)         0.2213282 3.0345876
## targetIsAmbiguousC -3.4303089 0.2999958

Relationship between sampling and test accuracy

#join sampling and test
subj_selection <- subj_selection %>%
  left_join(subj_test,by=c("subject","experiment_name","ambiguity_condition"))

#correlation between preference for sampling ambiguous items and test performance
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 1")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 1")$prop_ambig_selection)

## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==  and subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==     "Experiment 1")$accuracy and     "Experiment 1")$prop_ambig_selection
## t = 3.6465, df = 26, p-value = 0.001167
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2664476 0.7845213
## sample estimates:
##       cor 
## 0.5817003

Plot correlation between sampling preference and test accuracy

p_exp1_sampling_test <- ggplot(filter(subj_selection,experiment_name=="Experiment 1"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  scale_color_manual(limits=c("ambiguous"),
                     values=c("#E41A1C"))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=24)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Probability of \nAmbiguous Selection")
p_exp1_sampling_test

Experiment 2

Sampling

Analysis of learners’ sampling preferences.

Plot

ggplot(filter(subj_selection,experiment_name=="Experiment 2"),aes(x=num_ambig_selection,fill=as.factor(num_ambig_selection),color=as.factor(num_ambig_selection)))+
  scale_fill_brewer(palette="Set1",direction=-1)+
  scale_color_brewer(palette="Set1",direction=-1)+
  geom_bar(stat="count",size=1.5,alpha=0.2,width=0.5)+
  geom_vline(xintercept=4/3,linetype="dashed")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16),
        legend.position="none")+
  ylab("Number of subjects")+
  xlab("Number of ambiguous selections")+
  #geom_density(aes(group=1, y=..count../1.3))+
  scale_x_continuous(breaks=c(0,1,2,3,4), limits=c(-0.5,4))+
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30), limits=c(0,20))

Descriptives

#descriptives
subj_summary_2 <-  subj_selection %>%
  filter(experiment_name=="Experiment 2") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_2)

ambiguity_condition	N	prop_ambiguous	prop_ambiguous_lower_ci	prop_ambiguous_upper_ci
ambiguous	38	0.3289474	0.2712987	0.386596

Logistic mixed-effects model

d$offset.33 <- 1/3
m <- glmer(isAmbiguous~offset(logit(offset.33))+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ offset(logit(offset.33)) + (1 | subject) + (1 |  
##     choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 2" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    198.6    207.6    -96.3    192.6      149 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.7001 -0.7001 -0.7001  1.4283  1.4283 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0        0       
##  choiceImage (Intercept) 0        0       
## Number of obs: 152, groups:  subject, 38; choiceImage, 6
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.0198     0.1726  -0.115    0.909

confint(m, method="Wald")[3,]

##      2.5 %     97.5 % 
## -0.3581671  0.3185617

Non-parametric test (Wilcoxon)

wilcox.test(filter(subj_selection,experiment_name=="Experiment 2")$prop_ambig_selection,mu=1/3, conf.int=T, conf.level=0.95)

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  filter(subj_selection, experiment_name == "Experiment 2")$prop_ambig_selection
## V = 409, p-value = 0.5734
## alternative hypothesis: true location is not equal to 0.3333333
## 95 percent confidence interval:
##  0.2500390 0.3750141
## sample estimates:
## (pseudo)median 
##      0.3749301

Relationship with Age

#predict ambiguous selections from age
m <- glmer(isAmbiguous~Age+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ Age + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 2" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    200.6    212.7    -96.3    192.6      148 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.7034 -0.7006 -0.6989  1.4260  1.4334 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0        0       
##  choiceImage (Intercept) 0        0       
## Number of obs: 152, groups:  subject, 38; choiceImage, 6
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.737329   0.881389  -0.837    0.403
## Age          0.004131   0.146437   0.028    0.977
## 
## Correlation of Fixed Effects:
##     (Intr)
## Age -0.981

confint(m, method="Wald")[3:4,]

##                  2.5 %    97.5 %
## (Intercept) -2.4648196 0.9901613
## Age         -0.2828792 0.2911415

#plot model predictions
pX <- data.frame(Age=seq(min(subset(d,experiment_name=="Experiment 2"&trialType=="selection")$Age,na.rm=T),max(subset(d,experiment_name=="Experiment 2"&trialType=="selection")$Age,na.rm=T),by=0.1))
pY <- predictSE(m,pX,re.form=NA,type="response")
pX$isAmbiguous <- pY$fit
pX$YLower <- pY$fit-pY$se.fit
pX$YUpper <- pY$fit+pY$se.fit
ggplot(pX,aes(Age,isAmbiguous))+
  geom_violinh(data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),scale="count",width=0.1, trim=F)+
  geom_jitter(data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),height=0.01)+
  geom_smooth(aes(ymin=YLower,ymax=YUpper),stat="identity",color="#E41A1C",fill="#E41A1C")+
  geom_hline(yintercept=1/3,linetype="dotted")+
  theme_classic(base_size=16)+
  xlab("Age (in years)")+
  scale_x_continuous(breaks=c(3,4,5,6,7,8))+
  ylab("Proportion of ambiguous selections")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))

Test

Plot

#summarize across participants by item type
subj_summary_item_2 <- summarySEwithin(
  filter(
    subj_test_item,
    (experiment_name=="Experiment 2")),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci
  ) %>%
  select(-accuracy_norm,-sd,-se,-ci)
## create plot
ggplot(subj_summary_item_2,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_dotplot(data=filter(subj_test_item,experiment_name=="Experiment 2"),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/6,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))

Descriptives

Overall Test Accuracy

##descriptives
#overall
subj_summary_test_2 <-  subj_test %>%
  filter(experiment_name=="Experiment 2") %>%
  group_by(experiment_name,ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_correct=mean(accuracy,na.rm=T),
    ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
    prop_correct_lower_ci=prop_correct-ci,
    prop_correct_upper_ci=prop_correct+ci
  ) %>%
  select(-ci)
kable(subj_summary_test_2)

experiment_name	ambiguity_condition	N	prop_correct	prop_correct_lower_ci	prop_correct_upper_ci
Experiment 2	ambiguous	38	0.3859649	0.3073429	0.4645869

Test Accuracy Split by Item

#by item type
subj_summary_item_2 %>%
  kable()

experiment_name	ambiguity_condition	targetIsAmbiguousYN	N	accuracy	lower_ci	upper_ci
Experiment 2	ambiguous	no	38	0.3355263	0.2484583	0.4225943
Experiment 2	ambiguous	yes	38	0.4868421	0.3997741	0.5739101

Logistic Mixed-Effects Model

Overall Test Accuracy

## testing overall accuracy against chance
d$offset.17 <- 1/6
m <- glmer(isRight~offset(logit(offset.17))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 2"&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ offset(logit(offset.17)) + (1 | subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 2" & trialType == "test")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    306.6    316.9   -150.3    300.6      224 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0598 -0.7565 -0.6739  1.0550  1.4839 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.3354   0.5791  
##  targetImage (Intercept) 0.0000   0.0000  
## Number of obs: 227, groups:  subject, 38; targetImage, 6
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.1131     0.1723   6.461 1.04e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Test Accuracy by Item Type

## accuracy by item type
##logistic mixed=effects model
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 2"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: subset(d, trialType == "test" & experiment_name == "Experiment 2")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    307.5    328.0   -147.7    295.5      221 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3639 -0.7441 -0.5889  1.0369  1.6980 
## 
## Random effects:
##  Groups      Name               Variance  Std.Dev. Corr
##  subject     (Intercept)        0.3840684 0.61973      
##              targetIsAmbiguousC 0.0001614 0.01271  1.00
##  targetImage (Intercept)        0.0000000 0.00000      
## Number of obs: 227, groups:  subject, 38; targetImage, 6
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         -0.3970     0.1824  -2.176   0.0296 *
## targetIsAmbiguousC   0.6795     0.3046   2.231   0.0257 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## trgtIsAmbgC 0.209

confint(m, method="Wald")[5:6,]

##                          2.5 %      97.5 %
## (Intercept)        -0.75450961 -0.03940061
## targetIsAmbiguousC  0.08253043  1.27644793

Ambiguous Item Choices

Which items (ambiguous vs. disambiguated) do participants choose on each test trial type? Here, we asked what proportion of the time participants select one of the two ambiguous items when the target label is for an ambiguous vs. a disambiguated item (regardless of accuracy).

## by choice type
# Which items (ambiguous vs. disambiguated) do participants choose on each test trial type?
d$testChoiceType <- ifelse(d$trialType=="test"&(as.character(d$choiceImage)==d$High1|as.character(d$choiceImage)==d$High2),"ambiguous",
                           ifelse(d$trialType=="test","disambiguated",NA))

#summarize choice tendency by participant
subj_test_choiceType_2 <- d %>%
  filter(trialType=="test"&experiment_name  == "Experiment 2") %>%
  group_by(subject,experiment_name,ambiguity_condition,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    ambiguous_choice=mean(testChoiceType=="ambiguous",na.rm=T))

#summarize across participants
subj_summary_choiceType_2 <- summarySEwithin(
  subj_test_choiceType_2,
  "ambiguous_choice",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = ambiguous_choice - ci,
    upper_ci = ambiguous_choice + ci
  ) %>%
  select(-ambiguous_choice_norm,-sd,-se,-ci)
kable(subj_summary_choiceType_2)

experiment_name	ambiguity_condition	targetIsAmbiguousYN	N	ambiguous_choice	lower_ci	upper_ci
Experiment 2	ambiguous	no	38	0.1842105	0.0798448	0.2885762
Experiment 2	ambiguous	yes	38	0.6184211	0.5140554	0.7227867

##plot
ggplot(subj_summary_choiceType_2,aes(x=targetIsAmbiguousYN,y=ambiguous_choice,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_dotplot(data=subj_test_choiceType_2,alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","ambiguous"))+
  ylab("Proportion Ambiguous Items Selected")+
  geom_hline(yintercept=1/3,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))

Sampling and test accuracy

Overall Accuracy for Sampled vs. Not Sampled Items

#summarize test accuracy by choice
subj_test_choice_2 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 2")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T)) 

subj_summary_choice_2 <- summarySEwithin(
  subj_test_choice_2,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )
subj_summary_choice_2 %>%
  select(-accuracy_norm, -sd,-se,-ci) %>%
  kable()

experiment_name	ambiguity_condition	chosen	N	accuracy	lower_ci	upper_ci	chosen_factor
Experiment 2	ambiguous	0	38	0.2675439	0.1621150	0.3729727	NOT SAMPLED
Experiment 2	ambiguous	1	38	0.4517544	0.3463255	0.5571833	SAMPLED

Plot by Item Type

subj_test_item_choice_2 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 2")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))

subj_summary_item_choice_2 <- summarySEwithin(
  subj_test_item_choice_2,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen","targetIsAmbiguousYN"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )

ggplot(subj_summary_item_choice_2,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("disambiguated","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))+
  facet_wrap(~chosen_factor)

Logistic Mixed-Effects Model

d$chosenC <- ifelse(!is.na(d$chosen)&d$chosen==0,-0.5,
            ifelse(!is.na(d$chosen)&d$chosen==1,0.5,NA))
m <- glmer(isRight~targetIsAmbiguousC*chosenC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 2"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguousC * chosenC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: subset(d, trialType == "test" & experiment_name == "Experiment 2")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    302.2    329.6   -143.1    286.2      219 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5720 -0.7516 -0.4534  0.9339  2.4960 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr 
##  subject     (Intercept)        0.44890  0.6700        
##              targetIsAmbiguousC 0.03932  0.1983   -1.00
##  targetImage (Intercept)        0.00000  0.0000        
## Number of obs: 227, groups:  subject, 38; targetImage, 6
## 
## Fixed effects:
##                            Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                 -0.5662     0.2014  -2.811  0.00494 **
## targetIsAmbiguousC           0.8276     0.3441   2.405  0.01618 * 
## chosenC                      0.8896     0.3349   2.656  0.00790 **
## targetIsAmbiguousC:chosenC  -0.4927     0.6997  -0.704  0.48135   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trgIAC chosnC
## trgtIsAmbgC  0.048              
## chosenC     -0.330  0.037       
## trgtIsAmC:C  0.042 -0.412  0.158

confint(m, method="Wald")[5:8,]

##                                 2.5 %    97.5 %
## (Intercept)                -0.9610699 -0.171419
## targetIsAmbiguousC          0.1530674  1.502058
## chosenC                     0.2332100  1.545923
## targetIsAmbiguousC:chosenC -1.8640073  0.878679

Correlation between overall sampling preference and test accuracy

## preference for sampling ambiguous items and test performance
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 2")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 2")$prop_ambig_selection)

## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==  and subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==     "Experiment 2")$accuracy and     "Experiment 2")$prop_ambig_selection
## t = 1.3557, df = 36, p-value = 0.1836
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1068138  0.5045310
## sample estimates:
##       cor 
## 0.2203953

##plot
ggplot(filter(subj_selection,experiment_name=="Experiment 2"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  scale_color_manual(limits=c("ambiguous"),
                     values=c("#E41A1C"))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=24)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Probability of \nAmbiguous Selection")

Experiment 3

Sampling

Analysis of learners’ sampling preferences.

Plot

ggplot(filter(subj_selection,experiment_name=="Experiment 3"),aes(x=num_ambig_selection,fill=as.factor(num_ambig_selection),color=as.factor(num_ambig_selection)))+
  scale_fill_brewer(palette="Set1",direction=-1)+
  scale_color_brewer(palette="Set1",direction=-1)+
  geom_bar(stat="count",size=1.5,alpha=0.2,width=0.5)+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16),
        legend.position="none")+
  ylab("Number of subjects")+
  xlab("Number of ambiguous selections")+
  scale_x_continuous(breaks=c(0,1,2))+
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30))

Descriptives

#descriptives
subj_summary_3 <-  subj_selection %>%
  filter(experiment_name=="Experiment 3") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_3)

ambiguity_condition	N	prop_ambiguous	prop_ambiguous_lower_ci	prop_ambiguous_upper_ci
ambiguous_me	56	0.6339286	0.5436849	0.7241723

Logistic mixed-effects model

m=glmer(isAmbiguous ~ 1+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ 1 + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 3" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    153.1    161.3    -73.6    147.1      109 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3189 -1.3126  0.7582  0.7601  0.7618 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev. 
##  subject     (Intercept) 2.663e-15 5.160e-08
##  choiceImage (Intercept) 1.529e-03 3.911e-02
## Number of obs: 112, groups:  subject, 56; choiceImage, 4
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   0.5498     0.2031   2.707  0.00679 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

confint(m,method="Wald")[3,]

##     2.5 %    97.5 % 
## 0.1517166 0.9479760

Non-parametric test (Wilcoxon)

wilcox.test(filter(subj_selection,experiment_name=="Experiment 3")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  filter(subj_selection, experiment_name == "Experiment 3")$prop_ambig_selection
## V = 330, p-value = 0.005553
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.5000378 1.0000000
## sample estimates:
## (pseudo)median 
##      0.9999326

Relationship with Age

#predict ambiguous selections from age
m <- glmer(isAmbiguous~Age+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),family=binomial)
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ Age + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment 3" & trialType == "selection")
## 
##      AIC      BIC   logLik deviance df.resid 
##    148.4    159.3    -70.2    140.4      108 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2833 -1.0456  0.5800  0.7368  1.2408 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev.
##  subject     (Intercept) 1.081e-08 0.000104
##  choiceImage (Intercept) 2.176e-03 0.046648
## Number of obs: 112, groups:  subject, 56; choiceImage, 4
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -1.9284     1.0046  -1.919   0.0549 .
## Age           0.4551     0.1837   2.478   0.0132 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##     (Intr)
## Age -0.978

confint(m, method="Wald")[3:4,]

##                   2.5 %     97.5 %
## (Intercept) -3.89741195 0.04068293
## Age          0.09510293 0.81512510

#plot model predictions
pX <- data.frame(Age=seq(min(subset(d,experiment_name=="Experiment 3"&trialType=="selection")$Age,na.rm=T),max(subset(d,experiment_name=="Experiment 3"&trialType=="selection")$Age,na.rm=T),by=0.1))
pY <- predictSE(m,pX,re.form=NA,type="response")
pX$isAmbiguous <- pY$fit
pX$YLower <- pY$fit-pY$se.fit
pX$YUpper <- pY$fit+pY$se.fit
ggplot(pX,aes(Age,isAmbiguous))+
  geom_violinh(data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),scale="count",width=0.1, trim=F)+
  geom_jitter(data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),height=0.01)+
  geom_smooth(aes(ymin=YLower,ymax=YUpper),stat="identity",color="#E41A1C",fill="#E41A1C")+
  geom_hline(yintercept=0.5,linetype="dotted")+
  theme_classic(base_size=16)+
  xlab("Age (in years)")+
  scale_x_continuous(breaks=c(3,4,5,6,7,8))+
  ylab("Proportion of ambiguous selections")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))

Test

Analysis of learners’ test accuracy.

Plot

##plot by item type
#by item type
subj_summary_item_3 <- summarySEwithin(
  filter(
    subj_test_item,
    (experiment_name=="Experiment 3")),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci
  )

ggplot(subj_summary_item_3,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_dotplot(data=filter(subj_test_item,experiment_name=="Experiment 3"),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("mutual exclusivity","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("mutual exclusivity","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("mutual exclusivity","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))

Descriptives

Overall Test Accuracy

## descriptives
subj_summary_test_3 <-  subj_test %>%
  filter(experiment_name=="Experiment 3") %>%
  group_by(experiment_name,ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_correct=mean(accuracy,na.rm=T),
    ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
    prop_correct_lower_ci=prop_correct-ci,
    prop_correct_upper_ci=prop_correct+ci)
subj_summary_test_3 %>%
  select(-ci) %>%
  kable()

experiment_name	ambiguity_condition	N	prop_correct	prop_correct_lower_ci	prop_correct_upper_ci
Experiment 3	ambiguous_me	56	0.5758929	0.4839003	0.6678855

Test Accuracy Split by Item

subj_summary_item_3 %>%
  select(-accuracy_norm,-se,-sd,-ci) %>%
  kable()

experiment_name	ambiguity_condition	targetIsAmbiguousYN	N	accuracy	lower_ci	upper_ci
Experiment 3	ambiguous_me	no	56	0.6160714	0.5280317	0.7041111
Experiment 3	ambiguous_me	yes	56	0.5357143	0.4476746	0.6237540

Logistic Mixed-Effects Model

Overall Test Accuracy

##logistic mixed-effects model
d$offset.25 <- 1/4
#overall
m <- glmer(isRight~offset(logit(offset.25))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 3"&trialType=="test"),family=binomial)
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ offset(logit(offset.25)) + (1 | subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 3" & trialType == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##    289.9    300.2   -142.0    283.9      221 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6324 -0.6345  0.3944  0.6126  1.2762 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 2.17173  1.4737  
##  targetImage (Intercept) 0.09708  0.3116  
## Number of obs: 224, groups:  subject, 56; targetImage, 4
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.5272     0.3007   5.079 3.79e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Test Accuracy by Item Type

d$offset.25 <- 1/4
##logistic mixed-effects model
#by item type
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 3"&trialType=="test"),family=binomial)
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: filter(d, experiment_name == "Experiment 3" & trialType == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##    293.5    314.0   -140.8    281.5      218 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7428 -0.6127  0.3710  0.5865  1.3274 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr
##  subject     (Intercept)        2.5206   1.5876       
##              targetIsAmbiguousC 0.7476   0.8646   0.39
##  targetImage (Intercept)        0.1001   0.3164       
## Number of obs: 224, groups:  subject, 56; targetImage, 4
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)
## (Intercept)          0.4427     0.3168   1.397    0.162
## targetIsAmbiguousC  -0.4392     0.3615  -1.215    0.224
## 
## Correlation of Fixed Effects:
##             (Intr)
## trgtIsAmbgC 0.064

confint(m,method="Wald")[5:6,]

##                         2.5 %    97.5 %
## (Intercept)        -0.1781934 1.0636353
## targetIsAmbiguousC -1.1478162 0.2693513

Sampling and test accuracy

Overall Accuracy for Sampled vs. Not Sampled Items

#summarize test accuracy by choice
subj_test_choice_3 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 3")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))

subj_summary_choice_3 <- summarySEwithin(
  subj_test_choice_3,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )
subj_summary_choice_3 %>%
  select(-accuracy_norm, -sd,-se,-ci) %>%
  kable()

experiment_name	ambiguity_condition	chosen	N	accuracy	lower_ci	upper_ci	chosen_factor
Experiment 3	ambiguous_me	0	56	0.5595238	0.4653227	0.6537249	NOT SAMPLED
Experiment 3	ambiguous_me	1	56	0.5982143	0.5040132	0.6924154	SAMPLED

Plot by Item Type

subj_test_item_choice_3 <- d %>%
  filter(trialType=="test"&experiment_name  %in% c("Experiment 3")) %>%
  group_by(subject,experiment_name,ambiguity_condition,chosen,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))

subj_summary_item_choice_3 <- summarySEwithin(
  subj_test_item_choice_3,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("chosen","targetIsAmbiguousYN"),
  idvar="subject",
  na.rm=T) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
  )

ggplot(subj_summary_item_choice_3,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("mutual exclusivity","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("mutual exclusivity","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type",
                   limits=c("no","yes"),
                   labels=c("mutual exclusivity","ambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
  theme_classic(base_size=16)+
  theme(legend.position="none", axis.text.x=element_text(size=15))+
  facet_wrap(~chosen_factor)

Logistic Mixed-Effects Model

m <- glmer(isRight~targetIsAmbiguousC*chosenC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 3"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguousC * chosenC + (1 + targetIsAmbiguousC |  
##     subject) + (1 | targetImage)
##    Data: subset(d, trialType == "test" & experiment_name == "Experiment 3")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    296.5    323.8   -140.3    280.5      216 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9202 -0.6319  0.3743  0.5870  1.5328 
## 
## Random effects:
##  Groups      Name               Variance Std.Dev. Corr
##  subject     (Intercept)        2.47115  1.5720       
##              targetIsAmbiguousC 0.52951  0.7277   0.38
##  targetImage (Intercept)        0.09831  0.3135       
## Number of obs: 224, groups:  subject, 56; targetImage, 4
## 
## Fixed effects:
##                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                  0.4250     0.3188   1.333    0.182
## targetIsAmbiguousC          -0.5291     0.3671  -1.441    0.150
## chosenC                      0.3414     0.3474   0.983    0.326
## targetIsAmbiguousC:chosenC   0.2390     0.8039   0.297    0.766
## 
## Correlation of Fixed Effects:
##             (Intr) trgIAC chosnC
## trgtIsAmbgC  0.045              
## chosenC      0.001 -0.255       
## trgtIsAmC:C -0.164  0.003  0.031

confint(m, method="Wald")[5:8,]

##                                 2.5 %    97.5 %
## (Intercept)                -0.1997402 1.0497618
## targetIsAmbiguousC         -1.2487101 0.1904759
## chosenC                    -0.3395635 1.0223851
## targetIsAmbiguousC:chosenC -1.3365340 1.8145833

Correlation between overall sampling preference and test accuracy

##relationship between sampling preference for ambiguous items and test accuracy
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous_me"&experiment_name=="Experiment 3")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous_me"&experiment_name=="Experiment 3")$prop_ambig_selection)

## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous_me" &  and subset(subj_selection, ambiguity_condition == "ambiguous_me" &     experiment_name == "Experiment 3")$accuracy and     experiment_name == "Experiment 3")$prop_ambig_selection
## t = 0.49839, df = 54, p-value = 0.6202
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1987701  0.3247892
## sample estimates:
##        cor 
## 0.06766648

##plot
ggplot(filter(subj_selection,experiment_name=="Experiment 3"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
  scale_color_manual(limits=c("ambiguous_me"),
                     values=c("#E41A1C"))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=24)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Probability of \nAmbiguous Selection")

Experiment S1

Sampling

Analysis of learners’ sampling preferences.

Plot

## Plot
d$conditionFull <- ifelse(d$ambiguity_condition=="ambiguous",0,
                          ifelse(d$ambiguity_condition=="partially ambiguous",-1,NA))
m <- glmer(isAmbiguous~conditionFull+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
model_pred <- data.frame(conditionFull=c(-1,0),ambiguity_condition=c("partially ambiguous","ambiguous"))
pY <- predictSE(m,model_pred, type="response")
model_pred <- model_pred %>%
  mutate(
    prop_ambiguous = pY$fit,
    prop_ambiguous_lower_ci = pY$fit - 1.96*pY$se.fit,
    prop_ambiguous_upper_ci = pY$fit + 1.96*pY$se.fit)
p_expS1_sampling <- ggplot(model_pred,aes(x=ambiguity_condition,y=prop_ambiguous,color=ambiguity_condition,fill=ambiguity_condition))+
  geom_bar(stat="identity",size=2.5,fill="white",width=0.5)+
  geom_dotplot(data=subset(subj_selection,experiment_name=="Experiment S1"), aes(y=prop_ambig_selection),binaxis="y",stackdir="center",alpha=0.5,dotsize=0.6)+
  geom_errorbar(aes(ymin=prop_ambiguous_lower_ci,ymax=prop_ambiguous_upper_ci),width=0,size=1.2)+
  ylab("Probability of \nAmbiguous Selection")+
  geom_hline(yintercept=0.5,linetype="dotted")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  theme_classic(base_size=18)+
  theme(legend.position="none")+
  scale_x_discrete(name="Condition")
p_expS1_sampling

Descriptives

#descriptives
subj_summary_s1 <-  subj_selection %>%
  filter(experiment_name=="Experiment S1") %>%
  group_by(ambiguity_condition) %>%
  summarize(
    N=n(),
    prop_ambiguous=mean(prop_ambig_selection),
    ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
    prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
    prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
  ) %>%
  select(-ci_ambiguous)
kable(subj_summary_s1)

ambiguity_condition	N	prop_ambiguous	prop_ambiguous_lower_ci	prop_ambiguous_upper_ci
ambiguous	28	0.6428571	0.5359183	0.7497960
partially ambiguous	34	0.4779412	0.3910626	0.5648197

Logistic mixed-effects model

Condition Comparison

Estimating the difference in preference for selecting ambiguous items in the Partially Ambiguous condition vs. the Fully Ambiguous condition.

d$conditionC <- ifelse(d$ambiguity_condition=="ambiguous",0.5, 
                       ifelse(d$ambiguity_condition=="partially ambiguous",-0.5,NA))
m <- glmer(isAmbiguous~conditionC+(1|subject)+(1|choiceImage),data=subset(d,(experiment_name=="Experiment S1")&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ conditionC + (1 | subject) + (1 | choiceImage)
##    Data: subset(d, (experiment_name == "Experiment S1") & trialType ==  
##     "selection")
## Control: glmerControl(check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##      342      356     -167      334      244 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3825 -0.9601  0.6865  0.9394  1.0968 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev. 
##  subject     (Intercept) 1.151e-01 3.393e-01
##  choiceImage (Intercept) 8.752e-10 2.958e-05
## Number of obs: 248, groups:  subject, 62; choiceImage, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   0.2569     0.1401   1.833   0.0668 .
## conditionC    0.6955     0.2815   2.471   0.0135 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## conditionC 0.145

confint(m,method="Wald")[3:4,]

##                   2.5 %    97.5 %
## (Intercept) -0.01777231 0.5315834
## conditionC   0.14384157 1.2472401

Fully Ambiguous Condition

Estimating the preference for (fully) ambiguous items by re-centering the model on the fully ambiguous condition. The intercept represents the preference for selecting ambiguous items in the Fully Ambiguous condition.

## Fully Ambiguous Condition
d$conditionFull <- ifelse(d$ambiguity_condition=="ambiguous",0,
                          ifelse(d$ambiguity_condition=="partially ambiguous",-1,NA))

m <- glmer(isAmbiguous~conditionFull+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ conditionFull + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment S1" & trialType == "selection")
## Control: glmerControl(check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##      342      356     -167      334      244 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3825 -0.9601  0.6865  0.9394  1.0968 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.1151   0.3393  
##  choiceImage (Intercept) 0.0000   0.0000  
## Number of obs: 248, groups:  subject, 62; choiceImage, 8
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)     0.6047     0.2125   2.845  0.00444 **
## conditionFull   0.6955     0.2815   2.471  0.01347 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## conditinFll 0.758

confint(m,method="Wald")[3:4,]

##                   2.5 %   97.5 %
## (Intercept)   0.1881258 1.021231
## conditionFull 0.1438395 1.247238

Partially Ambiguous Condition

Estimating the preference for (partially) ambiguous items by re-centering the model on the partially ambiguous condition. The intercept represents the preference for selecting ambiguous items in the Partially Ambiguous condition.

d$conditionPartial <- ifelse(d$ambiguity_condition=="ambiguous",1,
                            ifelse(d$ambiguity_condition=="partially ambiguous",0,NA))

m <- glmer(isAmbiguous~conditionPartial+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isAmbiguous ~ conditionPartial + (1 | subject) + (1 | choiceImage)
##    Data: 
## subset(d, experiment_name == "Experiment S1" & trialType == "selection")
## Control: glmerControl(check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##      342      356     -167      334      244 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3825 -0.9601  0.6865  0.9394  1.0968 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 0.1151   0.3393  
##  choiceImage (Intercept) 0.0000   0.0000  
## Number of obs: 248, groups:  subject, 62; choiceImage, 8
## 
## Fixed effects:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)      -0.09086    0.18365  -0.495   0.6208  
## conditionPartial  0.69553    0.28148   2.471   0.0135 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## conditnPrtl -0.656

confint(m,method="Wald")[3:4,]

##                       2.5 %    97.5 %
## (Intercept)      -0.4508106 0.2690974
## conditionPartial  0.1438339 1.2472347

Non-parametric test (Wilcoxon)

Condition Comparison

##condition comparison
wilcox.test(subset(subj_selection , ambiguity_condition=="ambiguous"&experiment_name=="Experiment S1")$prop_ambig_selection,
            subset(subj_selection, ambiguity_condition=="partially ambiguous"&experiment_name=="Experiment S1")$prop_ambig_selection,
            conf.int=T, conf.level=0.95)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name ==  and subset(subj_selection, ambiguity_condition == "partially ambiguous" &     "Experiment S1")$prop_ambig_selection and     experiment_name == "Experiment S1")$prop_ambig_selection
## W = 631.5, p-value = 0.02256
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  2.599459e-05 2.500091e-01
## sample estimates:
## difference in location 
##              0.2499557

Fully Ambiguous Condition

##Fully Ambiguous Condition
wilcox.test(subset(subj_selection, experiment_name=="Experiment S1"&ambiguity_condition=="ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  subset(subj_selection, experiment_name == "Experiment S1" & ambiguity_condition ==     "ambiguous")$prop_ambig_selection
## V = 140, p-value = 0.01534
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.5000486 0.8750296
## sample estimates:
## (pseudo)median 
##       0.750061

Partially Ambiguous Condition

##Partially Ambiguous Condition
wilcox.test(subset(subj_selection, experiment_name=="Experiment S1"&ambiguity_condition=="partially ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  subset(subj_selection, experiment_name == "Experiment S1" & ambiguity_condition ==     "partially ambiguous")$prop_ambig_selection
## V = 100.5, p-value = 0.5901
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
##  0.2500428 0.6249926
## sample estimates:
## (pseudo)median 
##      0.4999225

Test

Analysis of learners’ test performance.

##summarize participant accuracy by test half
subj_test_half <- d %>%
  filter(trialType=="test"&experiment_name=="Experiment S1") %>%
  group_by(subject,experiment_name,ambiguity_condition, testHalf) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))
##by item and test half
subj_test_half_item <- d %>%
  filter(trialType=="test"&experiment_name=="Experiment S1") %>%
  group_by(subject,experiment_name,ambiguity_condition, testHalf,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
  summarize(
    N=n(),
    accuracy=mean(isRight,na.rm=T))

Plot

#item summary within-subjects corrected CIs
subj_summary_item_testhalf_correctedCIs_s1 <- summarySEwithin(
  filter(
    subj_test_half_item,
    (experiment_name=="Experiment S1")),
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("testHalf","targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    testHalf_name = paste("Test Block",testHalf,sep=" ")
  )

#Experiment S1 - test
p_expS1_fulltest <- ggplot(filter(subj_summary_item_testhalf_correctedCIs_s1,ambiguity_condition=="ambiguous"),aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_point(data=filter(subj_test_half_item,experiment_name=="Experiment S1"&ambiguity_condition=="ambiguous"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","ambiguous"),
                     values=c("#4DAF4A","#E41A1C"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","ambiguous"),
                    values=c("#4DAF4A","#E41A1C"))+
  scale_x_discrete(name = "Item Type\n\nFully Ambiguous Condition",
                   limits=c("no","yes"),
                   labels=c("disambig-\nuated","fully\nambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
  theme_classic(base_size=12)+
  theme(legend.position="none", axis.text.x=element_text(size=9))+
  facet_wrap(~testHalf_name)

p_expS1_partialtest <- ggplot(filter(subj_summary_item_testhalf_correctedCIs_s1, ambiguity_condition=="partially ambiguous"),aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
  geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
  geom_point(data=filter(subj_test_half_item,experiment_name=="Experiment S1"&ambiguity_condition=="partially ambiguous"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
  scale_color_manual(name="Item Type",
                     limits=c("no","yes"),
                     labels=c("disambiguated","partially\nambiguous"),
                     values=c("#4DAF4A","#377EB8"))+
  scale_fill_manual(name="Item Type",
                    limits=c("no","yes"),
                    labels=c("disambiguated","partially\nambiguous"),
                    values=c("#4DAF4A","#377EB8"))+
  scale_x_discrete(name = "Item Type\n\nPartially Ambiguous Condition",
                   limits=c("no","yes"),
                   labels=c("disambig-\nuated","partially\nambiguous"))+
  ylab("Test Accuracy")+
  geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
  theme_classic(base_size=12)+
  theme(legend.position="none", axis.text.x=element_text(size=9))+
  facet_wrap(~testHalf_name)
plot_grid(p_expS1_fulltest,p_expS1_partialtest, labels=c("A","B"),rel_widths=c(1,1),label_size=24,nrow=1)

Descriptives

Overall Test Accuracy

Split by test half.

## descriptives
subj_summary_test_s1 <-  summarySEwithin(
  subj_test_half,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("testHalf"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    testHalf_name = paste("Test Block",testHalf,sep=" ")
  ) %>%
  select(-accuracy_norm,-se,-sd,-ci)
  
kable(subj_summary_test_s1)

experiment_name	ambiguity_condition	testHalf	N	accuracy	lower_ci	upper_ci	testHalf_name
Experiment S1	ambiguous	1	28	0.6517857	0.6118389	0.6917325	Test Block 1
Experiment S1	ambiguous	2	28	0.7276786	0.6877318	0.7676254	Test Block 2
Experiment S1	partially ambiguous	1	34	0.7683824	0.7248067	0.8119580	Test Block 1
Experiment S1	partially ambiguous	2	34	0.7757353	0.7321596	0.8193110	Test Block 2

Test Accuracy Split by Item

Split by test half.

## split by item
subj_summary_test_item_s1 <-  summarySEwithin(
  subj_test_half_item,
  "accuracy",
  betweenvars=c("experiment_name","ambiguity_condition"),
  withinvars=c("testHalf","targetIsAmbiguousYN"),
  idvar="subject") %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci,
    testHalf_name = paste("Test Block",testHalf,sep=" ")
  ) %>%
  select(-accuracy_norm,-se,-sd,-ci)

kable(subj_summary_test_item_s1)

experiment_name	ambiguity_condition	testHalf	targetIsAmbiguousYN	N	accuracy	lower_ci	upper_ci	testHalf_name
Experiment S1	ambiguous	1	no	28	0.8392857	0.7786705	0.8999009	Test Block 1
Experiment S1	ambiguous	1	yes	28	0.4642857	0.3776673	0.5509041	Test Block 1
Experiment S1	ambiguous	2	no	28	0.8571429	0.7783393	0.9359464	Test Block 2
Experiment S1	ambiguous	2	yes	28	0.5982143	0.5156617	0.6807669	Test Block 2
Experiment S1	partially ambiguous	1	no	34	0.7500000	0.6896160	0.8103840	Test Block 1
Experiment S1	partially ambiguous	1	yes	34	0.7867647	0.7272860	0.8462434	Test Block 1
Experiment S1	partially ambiguous	2	no	34	0.7941176	0.7390598	0.8491754	Test Block 2
Experiment S1	partially ambiguous	2	yes	34	0.7573529	0.6839887	0.8307171	Test Block 2

Logistic mixed-effects model

Three-Way Interaction: Effects of Item Type and Test Half across Conditions

d$testHalfC <- ifelse(!is.na(d$testHalf) & d$testHalf==2,0.5,
                      ifelse(!is.na(d$testHalf) & d$testHalf==1,-0.5,NA))

#three-way interaction
m <- glmer(isRight~targetIsAmbiguousC*conditionC*testHalfC+(1+targetIsAmbiguousC*testHalfC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment S1")&trialType=="test"&ambiguity_condition!="non ambiguous"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguousC * conditionC * testHalfC + (1 +  
##     targetIsAmbiguousC * testHalfC | subject) + (1 | targetImage)
##    Data: filter(d, (experiment_name == "Experiment S1") & trialType ==  
##     "test" & ambiguity_condition != "non ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    846.0    939.1   -404.0    808.0      973 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0148 -0.2249  0.1640  0.3500  4.4299 
## 
## Random effects:
##  Groups      Name                         Variance Std.Dev. Corr          
##  subject     (Intercept)                  5.4970   2.3446                 
##              targetIsAmbiguousC           1.0299   1.0148   0.02          
##              testHalfC                    0.7031   0.8385   0.99 0.19     
##              targetIsAmbiguousC:testHalfC 0.5055   0.7110   0.35 0.94 0.50
##  targetImage (Intercept)                  0.1513   0.3889                 
## Number of obs: 992, groups:  subject, 62; targetImage, 8
## 
## Fixed effects:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                               2.0641     0.3900   5.292 1.21e-07
## targetIsAmbiguousC                       -1.3301     0.3950  -3.368 0.000758
## conditionC                               -0.7664     0.6784  -1.130 0.258643
## testHalfC                                 0.8217     0.3614   2.273 0.023007
## targetIsAmbiguousC:conditionC            -2.7515     0.5694  -4.832 1.35e-06
## targetIsAmbiguousC:testHalfC             -0.1361     0.6891  -0.197 0.843444
## conditionC:testHalfC                      0.1909     0.5104   0.374 0.708427
## targetIsAmbiguousC:conditionC:testHalfC   0.7522     0.9304   0.808 0.418824
##                                            
## (Intercept)                             ***
## targetIsAmbiguousC                      ***
## conditionC                                 
## testHalfC                               *  
## targetIsAmbiguousC:conditionC           ***
## targetIsAmbiguousC:testHalfC               
## conditionC:testHalfC                       
## targetIsAmbiguousC:conditionC:testHalfC    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trgIAC cndtnC tstHlC trIAC:C tIAC:H cnC:HC
## trgtIsAmbgC -0.174                                           
## conditionC   0.028 -0.129                                    
## testHalfC    0.447 -0.127  0.000                             
## trgtIsAmC:C -0.108 -0.019 -0.003 -0.067                      
## trgtIsAC:HC -0.031  0.418 -0.020 -0.306 -0.038               
## cndtnC:tsHC -0.014 -0.097  0.476 -0.077  0.034  -0.236       
## trgIAC:C:HC -0.039 -0.045  0.059 -0.247  0.260  -0.093  0.086

confint(m, method="Wald")[12:19,]

##                                              2.5 %     97.5 %
## (Intercept)                              1.2996085  2.8285636
## targetIsAmbiguousC                      -2.1042260 -0.5560018
## conditionC                              -2.0960427  0.5633383
## testHalfC                                0.1132606  1.5301136
## targetIsAmbiguousC:conditionC           -3.8675112 -1.6355179
## targetIsAmbiguousC:testHalfC            -1.4867027  1.2145210
## conditionC:testHalfC                    -0.8095147  1.1912735
## targetIsAmbiguousC:conditionC:testHalfC -1.0713783  2.5757980

Specifically test the increase across test halfs for ambiguous items in the Fully Ambiguous Condition

#test specifically test half increase for ambiguous items in the Fully Ambiguous condition
d$targetIsAmbiguous_ambiguous <- ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==1,0,
                                        ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==0,-1,NA))
m <- glmer(isRight~targetIsAmbiguous_ambiguous*conditionFull*testHalfC+(1+targetIsAmbiguous_ambiguous*testHalfC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment S1")&trialType=="test"&ambiguity_condition!="non ambiguous"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: isRight ~ targetIsAmbiguous_ambiguous * conditionFull * testHalfC +  
##     (1 + targetIsAmbiguous_ambiguous * testHalfC | subject) +  
##     (1 | targetImage)
##    Data: filter(d, (experiment_name == "Experiment S1") & trialType ==  
##     "test" & ambiguity_condition != "non ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    846.0    939.1   -404.0    808.0      973 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0148 -0.2249  0.1640  0.3500  4.4299 
## 
## Random effects:
##  Groups      Name                                  Variance Std.Dev. Corr     
##  subject     (Intercept)                           5.8087   2.4101            
##              targetIsAmbiguous_ambiguous           1.0299   1.0148   0.23     
##              testHalfC                             1.1294   1.0627   0.97 0.46
##              targetIsAmbiguous_ambiguous:testHalfC 0.5055   0.7110   0.54 0.94
##  targetImage (Intercept)                           0.1513   0.3889            
##      
##      
##      
##      
##  0.73
##      
## Number of obs: 992, groups:  subject, 62; targetImage, 8
## 
## Fixed effects:
##                                                     Estimate Std. Error z value
## (Intercept)                                           0.3280     0.5258   0.624
## targetIsAmbiguous_ambiguous                          -2.7059     0.4825  -5.608
## conditionFull                                        -2.1421     0.7349  -2.915
## testHalfC                                             1.0371     0.4364   2.377
## targetIsAmbiguous_ambiguous:conditionFull            -2.7515     0.5694  -4.833
## targetIsAmbiguous_ambiguous:testHalfC                 0.2400     0.7947   0.302
## conditionFull:testHalfC                               0.5670     0.7195   0.788
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC   0.7522     0.9303   0.809
##                                                     Pr(>|z|)    
## (Intercept)                                          0.53276    
## targetIsAmbiguous_ambiguous                         2.05e-08 ***
## conditionFull                                        0.00356 ** 
## testHalfC                                            0.01747 *  
## targetIsAmbiguous_ambiguous:conditionFull           1.35e-06 ***
## targetIsAmbiguous_ambiguous:testHalfC                0.76263    
## conditionFull:testHalfC                              0.43070    
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC  0.41877    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trgIA_ cndtnF tstHlC trIA_:F tIA_:H cnF:HC
## trgtIsAmbg_ 0.236                                            
## conditinFll 0.640  0.123                                     
## testHalfC   0.498  0.161  0.300                              
## trgtIsAm_:F 0.181  0.575  0.384  0.073                       
## trgtIsA_:HC 0.133  0.346  0.062  0.481  0.119                
## cndtnFll:HC 0.232  0.033  0.422  0.467  0.192   0.217        
## trgIA_:F:HC 0.062  0.117  0.155  0.305  0.260   0.505  0.707

confint(m, method="Wald")[12:19,]

##                                                          2.5 %     97.5 %
## (Intercept)                                         -0.7025257  1.3584754
## targetIsAmbiguous_ambiguous                         -3.6515408 -1.7602106
## conditionFull                                       -3.5824604 -0.7017594
## testHalfC                                            0.1818284  1.8924395
## targetIsAmbiguous_ambiguous:conditionFull           -3.8674642 -1.6355665
## targetIsAmbiguous_ambiguous:testHalfC               -1.3175109  1.7975297
## conditionFull:testHalfC                             -0.8432513  1.9772172
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC -1.0711732  2.5755888

Relationship between sampling and test performance

Correlations in Test Half 1 and 2 between sampling preference and test accuracy

Fully Ambiguous Condition - Test Block 1

#correlations Experiment S1: proportion ambiguous items selected and test accuracy
subj_test_half <- subj_test_half %>% 
  left_join(select(subj_selection,subject,prop_ambig_selection)) %>%
  mutate(
    testHalf_name = paste("Test Block",testHalf,sep=" "))

#Fully Ambiguous - test block 1
cor.test(subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==1)$accuracy,subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==1)$prop_ambig_selection)

## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==  and subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==     1)$accuracy and     1)$prop_ambig_selection
## t = 2.5909, df = 26, p-value = 0.01549
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.09617591 0.70664954
## sample estimates:
##       cor 
## 0.4529988

Fully Ambiguous Condition - Test Block 2

#Fully Ambiguous - test block 2
cor.test(subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==2)$accuracy,subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==2)$prop_ambig_selection)

## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==  and subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf ==     2)$accuracy and     2)$prop_ambig_selection
## t = 1.8848, df = 26, p-value = 0.07069
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03028924  0.63734347
## sample estimates:
##       cor 
## 0.3467055

Partially Ambiguous Condition - Test Block 1

#Partially Ambiguous - test block 1
cor.test(subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==1)$accuracy,subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==1)$prop_ambig_selection)

## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "partially ambiguous" &  and subset(subj_test_half, ambiguity_condition == "partially ambiguous" &     testHalf == 1)$accuracy and     testHalf == 1)$prop_ambig_selection
## t = 0.55366, df = 32, p-value = 0.5837
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2489571  0.4216846
## sample estimates:
##        cor 
## 0.09740938

Partially Ambiguous Condition - Test Block 2

#Partially Ambiguous - test block 2
cor.test(subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==2)$accuracy,subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==2)$prop_ambig_selection)

## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half, ambiguity_condition == "partially ambiguous" &  and subset(subj_test_half, ambiguity_condition == "partially ambiguous" &     testHalf == 2)$accuracy and     testHalf == 2)$prop_ambig_selection
## t = -0.67859, df = 32, p-value = 0.5023
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4395666  0.2282537
## sample estimates:
##        cor 
## -0.1191055

Plot

#Experiment S1 - sampling-test
pS1_fullsampling_test <- ggplot(filter(subj_test_half,ambiguity_condition=="ambiguous"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  scale_color_manual(limits=c("ambiguous"),
                     labels=c("fully ambiguous"),
                     values=c("#E41A1C"))+
  #geom_point(position=position_jitter(width=.05,height=.0))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  #ylim(0,1.08)+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=12)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Proportion of Ambiguous Selections\n\nFully Ambiguous Condition")+
  facet_wrap(~testHalf_name)
pS1_partialsampling_test <- ggplot(filter(subj_test_half,ambiguity_condition=="partially ambiguous"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
  geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
  scale_color_manual(limits=c("partially ambiguous"),
                     labels=c("partially ambiguous"),
                     values=c("#377EB8"))+
  #geom_point(position=position_jitter(width=.05,height=.0))+
  geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
  theme_classic()+
  #ylim(0,1.08)+
  scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
  theme_classic(base_size=12)+
  ylab("Test Accuracy")+
  theme(legend.position="none")+
  xlab("Proportion of Ambiguous Selections\n\nPartially Ambiguous Condition")+
  facet_wrap(~testHalf_name)
plot_grid(pS1_fullsampling_test,pS1_partialsampling_test,labels=c("A","B"),rel_widths=c(1,1),label_size=24,nrow=1)

Increase in test accuracy related to sampling preference

By including a test phase immediately preceding the Sampling Phase, we aimed to further understand the correlation between test accuracy and preference for selecting ambiguous items observed in Experiment 1. Specifically, do participants have higher test accuracy at the conclusion of the experiment because they preferentially selected ambiguous items, or do participants who are more successful at learning the object-label associations show a stronger preference for selecting ambiguous items?

To test this question, we correlated participants’ proportion of ambiguous selections with their increase in accuracy from Test Block 1 to Test Block 2. If participants’ ambiguous selections are driving higher accuracy, then participants who show a preference for sampling ambiguous items should show the largest increases in accuracy from Test Block 1 to Test Block 2. However, proportion of ambiguous items selected was not significantly correlated with an increase in test accuracy in the Fully Ambiguous condition and negatively correlated in the Partially Ambiguous condition, i.e. participants who were more likely to select the (partially) ambiguous items showed a lesser increase in test accuracy.

#increase in accuracy block 1 to 2
subj_test_half_wide <- subj_test_half %>%
  select(-testHalf_name) %>%
  pivot_wider(names_from=testHalf,values_from=accuracy,names_prefix="test_block_") %>%
  mutate(accuracy_increase=test_block_2-test_block_1)

ggplot(filter(subj_test_half_wide,experiment_name=="Experiment S1"),aes(prop_ambig_selection,accuracy_increase))+
  geom_jitter()+
  geom_smooth(method="lm")+
  facet_wrap(~ambiguity_condition)

Fully Ambiguous Condition - Correlation between accuracy increase and sampling preference

#Fully Ambiguous - accuracy increase
cor.test(subset(subj_test_half_wide,ambiguity_condition=="ambiguous"&experiment_name=="Experiment S1")$accuracy_increase,subset(subj_test_half_wide,ambiguity_condition=="ambiguous"&experiment_name=="Experiment S1")$prop_ambig_selection)

## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half_wide, ambiguity_condition == "ambiguous" &  and subset(subj_test_half_wide, ambiguity_condition == "ambiguous" &     experiment_name == "Experiment S1")$accuracy_increase and     experiment_name == "Experiment S1")$prop_ambig_selection
## t = -0.3999, df = 26, p-value = 0.6925
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4384737  0.3037503
## sample estimates:
##         cor 
## -0.07818698

Partially Ambiguous Condition - Correlation between accuracy increase and sampling preference

#Partially Ambiguous - increase
cor.test(subset(subj_test_half_wide,ambiguity_condition=="partially ambiguous"&experiment_name=="Experiment S1")$accuracy_increase,subset(subj_test_half_wide,ambiguity_condition=="partially ambiguous"&experiment_name=="Experiment S1")$prop_ambig_selection)

## 
##  Pearson's product-moment correlation
## 
## data:  subset(subj_test_half_wide, ambiguity_condition == "partially ambiguous" &  and subset(subj_test_half_wide, ambiguity_condition == "partially ambiguous" &     experiment_name == "Experiment S1")$accuracy_increase and     experiment_name == "Experiment S1")$prop_ambig_selection
## t = -2.1986, df = 32, p-value = 0.03525
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.62398746 -0.02746191
## sample estimates:
##        cor 
## -0.3622634

By Test Item (plot)

#by item
subj_test_half_item <- subj_test_half_item %>% 
  left_join(select(subj_selection,subject,prop_ambig_selection))

subj_test_half_item_wide <- subj_test_half_item %>%
  pivot_wider(names_from=testHalf,values_from=accuracy,names_prefix="test_block_") %>%
  mutate(accuracy_increase=test_block_2-test_block_1)

ggplot(subj_test_half_item_wide,aes(prop_ambig_selection,accuracy_increase))+
  geom_jitter()+
  geom_smooth(method="lm")+
  facet_wrap(~ambiguity_condition+targetIsAmbiguousYN)