Analysis walkthrough for the paper
Zettersten, M., & Saffran, J. (accepted). Sampling to learn words: Adults and children sample words that reduce referential ambiguity. Developmental Science.
For more background information and the underlying data, visit the OSF repository for this report: OSF site
See crossact_codebook for information about individual columns.
d <- read.csv(data_path)
Overview over exclusions in each experiment.
##exclusions
exclusions <- d %>%
group_by(subject,experiment_name) %>%
summarize(Exclude=Exclude[1]) %>%
ungroup() %>%
group_by(experiment_name) %>%
summarize(N=n(),num_exclusions=sum(Exclude!="N"))
kable(exclusions)
experiment_name | N | num_exclusions |
---|---|---|
Experiment 1 | 31 | 3 |
Experiment 2 | 40 | 2 |
Experiment 3 | 58 | 2 |
Experiment S1 | 62 | 0 |
#remove excluded participants
d <- d %>%
filter(Exclude=="N")
Overview over demographic characteristics of each sample.
##demographics (exclusions filtered)
demographics <- d %>%
filter(Exclude=="N") %>%
group_by(subject,age_group,experiment_name) %>%
summarize(Gender=Gender[1],Age=Age[1],L1_english=L1_english[1],languages_besides_english_yn=languages_besides_english_yn[1],L1=L1[1],L1percent=L1percent[1]) %>%
ungroup() %>%
group_by(age_group,experiment_name) %>%
summarize(
N=n(),
gender_f=sum(Gender=="female"),
mean_age=round(mean(Age,na.rm=T),2),
sd_age=round(sd(Age,na.rm=T),2),
min_age=round(min(Age,na.rm=T),2),
max_age=round(max(Age,na.rm=T),2),
native_english=ifelse(is.na(sum(L1_english=="English")),sum(L1=="English",na.rm=T),sum(L1_english=="English",na.rm=T)),
languages_besides_english=ifelse(age_group[1]=="adults",NA,sum(languages_besides_english_yn=="Yes",na.rm=T)),
monolingual=ifelse(age_group[1]=="adults",NA,sum(L1percent>=90,na.rm=T))
)
demographics %>%
filter(experiment_name=="Experiment 1") %>%
kable()
age_group | experiment_name | N | gender_f | mean_age | sd_age | min_age | max_age | native_english | languages_besides_english | monolingual |
---|---|---|---|---|---|---|---|---|---|---|
adults | Experiment 1 | 28 | 8 | 31.39 | 7.25 | 19 | 48 | 28 | NA | NA |
demographics %>%
filter(experiment_name=="Experiment 2") %>%
kable()
age_group | experiment_name | N | gender_f | mean_age | sd_age | min_age | max_age | native_english | languages_besides_english | monolingual |
---|---|---|---|---|---|---|---|---|---|---|
kids | Experiment 2 | 38 | 19 | 5.9 | 1.19 | 4.1 | 8.12 | 38 | 6 | 32 |
d %>%
filter(Exclude=="N"&experiment_name=="Experiment 2") %>%
group_by(subject) %>%
summarize(hispanic=hispanic[1],ethnicity=ethnicity[1]) %>%
ungroup() %>%
group_by(hispanic,ethnicity) %>%
summarize(count=n()) %>%
kable()
hispanic | ethnicity | count |
---|---|---|
No | Asian | 1 |
No | Asian,White | 2 |
No | Black or African American,White | 1 |
No | Other | 1 |
No | White | 30 |
Yes | Other | 1 |
Yes | White | 1 |
NA | White | 1 |
Note that we are currently missing some demographic information (ethnicity & language) for 6 of the 56 participants, due to experimenter error.
demographics %>%
filter(experiment_name=="Experiment 3") %>%
kable()
age_group | experiment_name | N | gender_f | mean_age | sd_age | min_age | max_age | native_english | languages_besides_english | monolingual |
---|---|---|---|---|---|---|---|---|---|---|
kids | Experiment 3 | 56 | 33 | 5.53 | 1.18 | 3.29 | 7.88 | 50 | 17 | 38 |
d %>%
filter(Exclude=="N"&experiment_name=="Experiment 3") %>%
group_by(subject) %>%
summarize(hispanic=hispanic[1],ethnicity=ethnicity[1]) %>%
ungroup() %>%
group_by(hispanic,ethnicity) %>%
summarize(count=n()) %>%
kable()
hispanic | ethnicity | count |
---|---|---|
No | Asian | 8 |
No | Asian,Black or African American | 1 |
No | Asian,White | 1 |
No | Black or African American | 1 |
No | Black or African American,White | 1 |
No | Black or African American,White,Other | 1 |
No | Other | 1 |
No | White | 33 |
Yes | Other | 1 |
Yes | White | 1 |
NA | White | 1 |
NA | NA | 6 |
demographics %>%
filter(experiment_name=="Experiment S1") %>%
kable()
age_group | experiment_name | N | gender_f | mean_age | sd_age | min_age | max_age | native_english | languages_besides_english | monolingual |
---|---|---|---|---|---|---|---|---|---|---|
adults | Experiment S1 | 62 | 27 | 19.13 | 1.01 | 18 | 22 | 56 | NA | NA |
First, we summarize sampling and test behavior by participant and store these objects for later plotting and analysis.
#Summarize sampling and test behavior by subject
#used in later analyses and plotting
#selections by subject
subj_selection <- d %>%
filter(trialType=="selection") %>%
group_by(experiment_name,ambiguity_condition,subject) %>%
summarize(
N=n(),
prop_ambig_selection=sum(selectionType!="low")/N,
num_ambig_selection=sum(selectionType!="low"))
subj_selection %>%
arrange(experiment_name,ambiguity_condition,subject) %>%
DT::datatable()
#test performance by subject
subj_test <- d %>%
filter(trialType=="test") %>%
group_by(experiment_name,ambiguity_condition,subject) %>%
summarize(
N=n(),
accuracy=mean(isRight,na.rm=T))
subj_test %>%
arrange(experiment_name,ambiguity_condition,subject) %>%
DT::datatable()
#test performance split by item type (ambiguous vs. non-ambiguous)
subj_test_item <- d %>%
filter(trialType=="test") %>%
group_by(experiment_name,ambiguity_condition,subject,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
summarize(
N=n(),
accuracy=mean(isRight,na.rm=T))
subj_test_item %>%
arrange(experiment_name,ambiguity_condition,subject) %>%
DT::datatable()
Analysis of learners’ sampling preferences.
Plot is based on estimates from the logistic mixed-effects model
#create data frame with model predictions
m <- glmer(isAmbiguous~(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 1"&trialType=="selection"&ambiguity_condition=="ambiguous"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
model_pred <- data.frame(ambiguity_condition="ambiguous")
pY <- predictSE(m,model_pred, type="response")
model_pred <- model_pred %>%
mutate(
experiment_name= "Experiment 1",
prop_ambiguous = pY$fit,
prop_ambiguous_lower_ci = pY$fit - 1.96*pY$se.fit,
prop_ambiguous_upper_ci = pY$fit + 1.96*pY$se.fit)
#create plot
p_exp1_sampling <- ggplot(subset(model_pred,ambiguity_condition=="ambiguous"),aes(x=experiment_name,y=prop_ambiguous,color=ambiguity_condition,fill=ambiguity_condition))+
geom_bar(stat="identity",size=2.5,fill="white",width=0.5)+
geom_dotplot(data=subset(subj_selection,experiment_name=="Experiment 1"&ambiguity_condition=="ambiguous"), aes(y=prop_ambig_selection),binaxis="y",stackdir="center",alpha=0.5,dotsize=0.6)+
geom_errorbar(aes(ymin=prop_ambiguous_lower_ci,ymax=prop_ambiguous_upper_ci),width=0,size=1.2)+
ylab("Probability of \nAmbiguous Selection")+
geom_hline(yintercept=0.5,linetype="dotted")+
scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
scale_color_brewer(palette="Set1")+
scale_fill_brewer(palette="Set1")+
theme_classic(base_size=24)+
theme(legend.position="none")+
scale_x_discrete(name="Experiment 1")+
theme(axis.ticks.x = element_blank(), axis.text.x = element_blank())
p_exp1_sampling
#descriptives
subj_summary_1 <- subj_selection %>%
filter(experiment_name=="Experiment 1") %>%
group_by(ambiguity_condition) %>%
summarize(
N=n(),
prop_ambiguous=mean(prop_ambig_selection),
ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
) %>%
select(-ci_ambiguous)
kable(subj_summary_1)
ambiguity_condition | N | prop_ambiguous | prop_ambiguous_lower_ci | prop_ambiguous_upper_ci |
---|---|---|---|---|
ambiguous | 28 | 0.625 | 0.5084928 | 0.7415072 |
m <- glmer(isAmbiguous~(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 1"&trialType=="selection"&ambiguity_condition=="ambiguous"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isAmbiguous ~ (1 | subject) + (1 | choiceImage)
## Data:
## subset(d, experiment_name == "Experiment 1" & trialType == "selection" &
## ambiguity_condition == "ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 150.9 159.1 -72.5 144.9 109
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4998 -0.9396 0.5155 0.6668 1.0642
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.845 0.9193
## choiceImage (Intercept) 0.000 0.0000
## Number of obs: 112, groups: subject, 28; choiceImage, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6155 0.2845 2.163 0.0305 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(m,method="Wald")[3,]
## 2.5 % 97.5 %
## 0.05777627 1.17315329
Note that we ignore a singular fit warning here. This appears to be caused by the inclusion of a by-item random intercept - however, it does not appear to adversely affect the model fit, and a simplified model with the by-stimulus random intercept removed yields virtually identical results.
wilcox.test(filter(subj_selection, experiment_name=="Experiment 1"&ambiguity_condition=="ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)
##
## Wilcoxon signed rank test with continuity correction
##
## data: filter(subj_selection, experiment_name == "Experiment 1" & ambiguity_condition == "ambiguous")$prop_ambig_selection
## V = 151.5, p-value = 0.02026
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
## 0.5000775 0.8750528
## sample estimates:
## (pseudo)median
## 0.625024
Analysis of learners’ test performance.
## split by item, within-subjects corrected CIs
subj_summary_test_item_1 <- summarySEwithin(
filter(
subj_test_item,
experiment_name=="Experiment 1"),
"accuracy",
betweenvars=c("experiment_name","ambiguity_condition"),
withinvars=c("targetIsAmbiguousYN"),
idvar="subject") %>%
mutate(
lower_ci = accuracy - ci,
upper_ci = accuracy + ci
) %>%
select(-accuracy_norm,-sd,-se,-ci)
## by-item test plot
ggplot(subj_summary_test_item_1,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
geom_point(data=filter(subj_test_item,experiment_name=="Experiment 1"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
scale_color_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_fill_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_x_discrete(name = "Item Type",
limits=c("no","yes"),
labels=c("disambiguated","fully\nambiguous"))+
ylab("Test Accuracy")+
geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
theme_classic(base_size=16)+
theme(legend.position="none", axis.text.x=element_text(size=15))
## descriptives
subj_summary_test_1 <- subj_test %>%
filter(experiment_name=="Experiment 1") %>%
group_by(experiment_name,ambiguity_condition) %>%
summarize(
N=n(),
prop_correct=mean(accuracy,na.rm=T),
ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
prop_correct_lower_ci=prop_correct-ci,
prop_correct_upper_ci=prop_correct+ci
) %>%
select(-ci)
kable(subj_summary_test_1)
experiment_name | ambiguity_condition | N | prop_correct | prop_correct_lower_ci | prop_correct_upper_ci |
---|---|---|---|---|---|
Experiment 1 | ambiguous | 28 | 0.65625 | 0.5265768 | 0.7859232 |
kable(subj_summary_test_item_1)
experiment_name | ambiguity_condition | targetIsAmbiguousYN | N | accuracy | lower_ci | upper_ci |
---|---|---|---|---|---|---|
Experiment 1 | ambiguous | no | 28 | 0.6785714 | 0.5909601 | 0.7661827 |
Experiment 1 | ambiguous | yes | 28 | 0.6339286 | 0.5463173 | 0.7215399 |
## testing overall accuracy against chance
d$offset.125 <- 1/8
m <- glmer(isRight~offset(logit(offset.125))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 1"&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## isRight ~ offset(logit(offset.125)) + (1 | subject) + (1 | targetImage)
## Data: filter(d, experiment_name == "Experiment 1" & trialType == "test")
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 236.0 246.3 -115.0 230.0 221
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1422 -0.4954 0.2084 0.5398 2.0369
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 5.30551 2.3034
## targetImage (Intercept) 0.09789 0.3129
## Number of obs: 224, groups: subject, 28; targetImage, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.1893 0.5097 6.258 3.91e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## testing difference between items
d$targetIsAmbiguousC <- ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==1,0.5,
ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==0,-0.5,NA))
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment 1")&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |
## subject) + (1 | targetImage)
## Data: filter(d, (experiment_name == "Experiment 1") & trialType ==
## "test")
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 233.5 253.9 -110.7 221.5 218
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0032 -0.4774 0.0571 0.3672 1.7067
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 9.14612 3.024
## targetIsAmbiguousC 8.80532 2.967 -0.98
## targetImage (Intercept) 0.01905 0.138
## Number of obs: 224, groups: subject, 28; targetImage, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.6280 0.7177 2.268 0.0233 *
## targetIsAmbiguousC -1.5652 0.9516 -1.645 0.1000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## trgtIsAmbgC -0.823
confint(m, method="Wald")[5:6,]
## 2.5 % 97.5 %
## (Intercept) 0.2213282 3.0345876
## targetIsAmbiguousC -3.4303089 0.2999958
#join sampling and test
subj_selection <- subj_selection %>%
left_join(subj_test,by=c("subject","experiment_name","ambiguity_condition"))
#correlation between preference for sampling ambiguous items and test performance
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 1")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 1")$prop_ambig_selection)
##
## Pearson's product-moment correlation
##
## data: subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name == and subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name == "Experiment 1")$accuracy and "Experiment 1")$prop_ambig_selection
## t = 3.6465, df = 26, p-value = 0.001167
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2664476 0.7845213
## sample estimates:
## cor
## 0.5817003
Plot correlation between sampling preference and test accuracy
p_exp1_sampling_test <- ggplot(filter(subj_selection,experiment_name=="Experiment 1"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
scale_color_manual(limits=c("ambiguous"),
values=c("#E41A1C"))+
geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
theme_classic()+
scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
theme_classic(base_size=24)+
ylab("Test Accuracy")+
theme(legend.position="none")+
xlab("Probability of \nAmbiguous Selection")
p_exp1_sampling_test
Analysis of learners’ sampling preferences.
ggplot(filter(subj_selection,experiment_name=="Experiment 2"),aes(x=num_ambig_selection,fill=as.factor(num_ambig_selection),color=as.factor(num_ambig_selection)))+
scale_fill_brewer(palette="Set1",direction=-1)+
scale_color_brewer(palette="Set1",direction=-1)+
geom_bar(stat="count",size=1.5,alpha=0.2,width=0.5)+
geom_vline(xintercept=4/3,linetype="dashed")+
theme(axis.title = element_text(size=20),
axis.text = element_text(size=16),
legend.position="none")+
ylab("Number of subjects")+
xlab("Number of ambiguous selections")+
#geom_density(aes(group=1, y=..count../1.3))+
scale_x_continuous(breaks=c(0,1,2,3,4), limits=c(-0.5,4))+
scale_y_continuous(breaks=c(0,5,10,15,20,25,30), limits=c(0,20))
#descriptives
subj_summary_2 <- subj_selection %>%
filter(experiment_name=="Experiment 2") %>%
group_by(ambiguity_condition) %>%
summarize(
N=n(),
prop_ambiguous=mean(prop_ambig_selection),
ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
) %>%
select(-ci_ambiguous)
kable(subj_summary_2)
ambiguity_condition | N | prop_ambiguous | prop_ambiguous_lower_ci | prop_ambiguous_upper_ci |
---|---|---|---|---|
ambiguous | 38 | 0.3289474 | 0.2712987 | 0.386596 |
d$offset.33 <- 1/3
m <- glmer(isAmbiguous~offset(logit(offset.33))+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isAmbiguous ~ offset(logit(offset.33)) + (1 | subject) + (1 |
## choiceImage)
## Data:
## subset(d, experiment_name == "Experiment 2" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 198.6 207.6 -96.3 192.6 149
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.7001 -0.7001 -0.7001 1.4283 1.4283
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0 0
## choiceImage (Intercept) 0 0
## Number of obs: 152, groups: subject, 38; choiceImage, 6
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0198 0.1726 -0.115 0.909
confint(m, method="Wald")[3,]
## 2.5 % 97.5 %
## -0.3581671 0.3185617
Note that we ignore a singular fit warning here. This appears to be caused by the inclusion of a by-item random intercept - however, it does not appear to adversely affect the model fit, and a simplified model with the by-stimulus random intercept removed yields virtually identical results.
wilcox.test(filter(subj_selection,experiment_name=="Experiment 2")$prop_ambig_selection,mu=1/3, conf.int=T, conf.level=0.95)
##
## Wilcoxon signed rank test with continuity correction
##
## data: filter(subj_selection, experiment_name == "Experiment 2")$prop_ambig_selection
## V = 409, p-value = 0.5734
## alternative hypothesis: true location is not equal to 0.3333333
## 95 percent confidence interval:
## 0.2500390 0.3750141
## sample estimates:
## (pseudo)median
## 0.3749301
#predict ambiguous selections from age
m <- glmer(isAmbiguous~Age+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isAmbiguous ~ Age + (1 | subject) + (1 | choiceImage)
## Data:
## subset(d, experiment_name == "Experiment 2" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 200.6 212.7 -96.3 192.6 148
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.7034 -0.7006 -0.6989 1.4260 1.4334
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0 0
## choiceImage (Intercept) 0 0
## Number of obs: 152, groups: subject, 38; choiceImage, 6
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.737329 0.881389 -0.837 0.403
## Age 0.004131 0.146437 0.028 0.977
##
## Correlation of Fixed Effects:
## (Intr)
## Age -0.981
confint(m, method="Wald")[3:4,]
## 2.5 % 97.5 %
## (Intercept) -2.4648196 0.9901613
## Age -0.2828792 0.2911415
#plot model predictions
pX <- data.frame(Age=seq(min(subset(d,experiment_name=="Experiment 2"&trialType=="selection")$Age,na.rm=T),max(subset(d,experiment_name=="Experiment 2"&trialType=="selection")$Age,na.rm=T),by=0.1))
pY <- predictSE(m,pX,re.form=NA,type="response")
pX$isAmbiguous <- pY$fit
pX$YLower <- pY$fit-pY$se.fit
pX$YUpper <- pY$fit+pY$se.fit
ggplot(pX,aes(Age,isAmbiguous))+
geom_violinh(data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),scale="count",width=0.1, trim=F)+
geom_jitter(data=subset(d,experiment_name=="Experiment 2"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),height=0.01)+
geom_smooth(aes(ymin=YLower,ymax=YUpper),stat="identity",color="#E41A1C",fill="#E41A1C")+
geom_hline(yintercept=1/3,linetype="dotted")+
theme_classic(base_size=16)+
xlab("Age (in years)")+
scale_x_continuous(breaks=c(3,4,5,6,7,8))+
ylab("Proportion of ambiguous selections")+
theme(axis.title = element_text(size=20),
axis.text = element_text(size=16))
#summarize across participants by item type
subj_summary_item_2 <- summarySEwithin(
filter(
subj_test_item,
(experiment_name=="Experiment 2")),
"accuracy",
betweenvars=c("experiment_name","ambiguity_condition"),
withinvars=c("targetIsAmbiguousYN"),
idvar="subject") %>%
mutate(
lower_ci = accuracy - ci,
upper_ci = accuracy + ci
) %>%
select(-accuracy_norm,-sd,-se,-ci)
## create plot
ggplot(subj_summary_item_2,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
geom_dotplot(data=filter(subj_test_item,experiment_name=="Experiment 2"),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
scale_color_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_fill_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_x_discrete(name = "Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"))+
ylab("Test Accuracy")+
geom_hline(yintercept=1/6,linetype="dashed",size=1.1)+
theme_classic(base_size=16)+
theme(legend.position="none", axis.text.x=element_text(size=15))
##descriptives
#overall
subj_summary_test_2 <- subj_test %>%
filter(experiment_name=="Experiment 2") %>%
group_by(experiment_name,ambiguity_condition) %>%
summarize(
N=n(),
prop_correct=mean(accuracy,na.rm=T),
ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
prop_correct_lower_ci=prop_correct-ci,
prop_correct_upper_ci=prop_correct+ci
) %>%
select(-ci)
kable(subj_summary_test_2)
experiment_name | ambiguity_condition | N | prop_correct | prop_correct_lower_ci | prop_correct_upper_ci |
---|---|---|---|---|---|
Experiment 2 | ambiguous | 38 | 0.3859649 | 0.3073429 | 0.4645869 |
#by item type
subj_summary_item_2 %>%
kable()
experiment_name | ambiguity_condition | targetIsAmbiguousYN | N | accuracy | lower_ci | upper_ci |
---|---|---|---|---|---|---|
Experiment 2 | ambiguous | no | 38 | 0.3355263 | 0.2484583 | 0.4225943 |
Experiment 2 | ambiguous | yes | 38 | 0.4868421 | 0.3997741 | 0.5739101 |
## testing overall accuracy against chance
d$offset.17 <- 1/6
m <- glmer(isRight~offset(logit(offset.17))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 2"&trialType=="test"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isRight ~ offset(logit(offset.17)) + (1 | subject) + (1 | targetImage)
## Data: filter(d, experiment_name == "Experiment 2" & trialType == "test")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 306.6 316.9 -150.3 300.6 224
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.0598 -0.7565 -0.6739 1.0550 1.4839
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.3354 0.5791
## targetImage (Intercept) 0.0000 0.0000
## Number of obs: 227, groups: subject, 38; targetImage, 6
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1131 0.1723 6.461 1.04e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## accuracy by item type
##logistic mixed=effects model
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 2"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |
## subject) + (1 | targetImage)
## Data: subset(d, trialType == "test" & experiment_name == "Experiment 2")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 307.5 328.0 -147.7 295.5 221
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3639 -0.7441 -0.5889 1.0369 1.6980
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.3840684 0.61973
## targetIsAmbiguousC 0.0001614 0.01271 1.00
## targetImage (Intercept) 0.0000000 0.00000
## Number of obs: 227, groups: subject, 38; targetImage, 6
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3970 0.1824 -2.176 0.0296 *
## targetIsAmbiguousC 0.6795 0.3046 2.231 0.0257 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## trgtIsAmbgC 0.209
confint(m, method="Wald")[5:6,]
## 2.5 % 97.5 %
## (Intercept) -0.75450961 -0.03940061
## targetIsAmbiguousC 0.08253043 1.27644793
Which items (ambiguous vs. disambiguated) do participants choose on each test trial type? Here, we asked what proportion of the time participants select one of the two ambiguous items when the target label is for an ambiguous vs. a disambiguated item (regardless of accuracy).
## by choice type
# Which items (ambiguous vs. disambiguated) do participants choose on each test trial type?
d$testChoiceType <- ifelse(d$trialType=="test"&(as.character(d$choiceImage)==d$High1|as.character(d$choiceImage)==d$High2),"ambiguous",
ifelse(d$trialType=="test","disambiguated",NA))
#summarize choice tendency by participant
subj_test_choiceType_2 <- d %>%
filter(trialType=="test"&experiment_name == "Experiment 2") %>%
group_by(subject,experiment_name,ambiguity_condition,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
summarize(
N=n(),
ambiguous_choice=mean(testChoiceType=="ambiguous",na.rm=T))
#summarize across participants
subj_summary_choiceType_2 <- summarySEwithin(
subj_test_choiceType_2,
"ambiguous_choice",
betweenvars=c("experiment_name","ambiguity_condition"),
withinvars=c("targetIsAmbiguousYN"),
idvar="subject") %>%
mutate(
lower_ci = ambiguous_choice - ci,
upper_ci = ambiguous_choice + ci
) %>%
select(-ambiguous_choice_norm,-sd,-se,-ci)
kable(subj_summary_choiceType_2)
experiment_name | ambiguity_condition | targetIsAmbiguousYN | N | ambiguous_choice | lower_ci | upper_ci |
---|---|---|---|---|---|---|
Experiment 2 | ambiguous | no | 38 | 0.1842105 | 0.0798448 | 0.2885762 |
Experiment 2 | ambiguous | yes | 38 | 0.6184211 | 0.5140554 | 0.7227867 |
##plot
ggplot(subj_summary_choiceType_2,aes(x=targetIsAmbiguousYN,y=ambiguous_choice,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
geom_dotplot(data=subj_test_choiceType_2,alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
scale_color_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_fill_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_x_discrete(name = "Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"))+
ylab("Proportion Ambiguous Items Selected")+
geom_hline(yintercept=1/3,linetype="dashed",size=1.1)+
theme_classic(base_size=16)+
theme(legend.position="none", axis.text.x=element_text(size=15))
#summarize test accuracy by choice
subj_test_choice_2 <- d %>%
filter(trialType=="test"&experiment_name %in% c("Experiment 2")) %>%
group_by(subject,experiment_name,ambiguity_condition,chosen) %>%
summarize(
N=n(),
accuracy=mean(isRight,na.rm=T))
subj_summary_choice_2 <- summarySEwithin(
subj_test_choice_2,
"accuracy",
betweenvars=c("experiment_name","ambiguity_condition"),
withinvars=c("chosen"),
idvar="subject",
na.rm=T) %>%
mutate(
lower_ci = accuracy - ci,
upper_ci = accuracy + ci,
chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
)
subj_summary_choice_2 %>%
select(-accuracy_norm, -sd,-se,-ci) %>%
kable()
experiment_name | ambiguity_condition | chosen | N | accuracy | lower_ci | upper_ci | chosen_factor |
---|---|---|---|---|---|---|---|
Experiment 2 | ambiguous | 0 | 38 | 0.2675439 | 0.1621150 | 0.3729727 | NOT SAMPLED |
Experiment 2 | ambiguous | 1 | 38 | 0.4517544 | 0.3463255 | 0.5571833 | SAMPLED |
subj_test_item_choice_2 <- d %>%
filter(trialType=="test"&experiment_name %in% c("Experiment 2")) %>%
group_by(subject,experiment_name,ambiguity_condition,chosen,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
summarize(
N=n(),
accuracy=mean(isRight,na.rm=T))
subj_summary_item_choice_2 <- summarySEwithin(
subj_test_item_choice_2,
"accuracy",
betweenvars=c("experiment_name","ambiguity_condition"),
withinvars=c("chosen","targetIsAmbiguousYN"),
idvar="subject",
na.rm=T) %>%
mutate(
lower_ci = accuracy - ci,
upper_ci = accuracy + ci,
chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
)
ggplot(subj_summary_item_choice_2,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
scale_color_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_fill_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_x_discrete(name = "Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"))+
ylab("Test Accuracy")+
geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
theme_classic(base_size=16)+
theme(legend.position="none", axis.text.x=element_text(size=15))+
facet_wrap(~chosen_factor)
d$chosenC <- ifelse(!is.na(d$chosen)&d$chosen==0,-0.5,
ifelse(!is.na(d$chosen)&d$chosen==1,0.5,NA))
m <- glmer(isRight~targetIsAmbiguousC*chosenC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 2"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isRight ~ targetIsAmbiguousC * chosenC + (1 + targetIsAmbiguousC |
## subject) + (1 | targetImage)
## Data: subset(d, trialType == "test" & experiment_name == "Experiment 2")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 302.2 329.6 -143.1 286.2 219
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.5720 -0.7516 -0.4534 0.9339 2.4960
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.44890 0.6700
## targetIsAmbiguousC 0.03932 0.1983 -1.00
## targetImage (Intercept) 0.00000 0.0000
## Number of obs: 227, groups: subject, 38; targetImage, 6
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5662 0.2014 -2.811 0.00494 **
## targetIsAmbiguousC 0.8276 0.3441 2.405 0.01618 *
## chosenC 0.8896 0.3349 2.656 0.00790 **
## targetIsAmbiguousC:chosenC -0.4927 0.6997 -0.704 0.48135
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trgIAC chosnC
## trgtIsAmbgC 0.048
## chosenC -0.330 0.037
## trgtIsAmC:C 0.042 -0.412 0.158
confint(m, method="Wald")[5:8,]
## 2.5 % 97.5 %
## (Intercept) -0.9610699 -0.171419
## targetIsAmbiguousC 0.1530674 1.502058
## chosenC 0.2332100 1.545923
## targetIsAmbiguousC:chosenC -1.8640073 0.878679
## preference for sampling ambiguous items and test performance
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 2")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous"&experiment_name=="Experiment 2")$prop_ambig_selection)
##
## Pearson's product-moment correlation
##
## data: subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name == and subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name == "Experiment 2")$accuracy and "Experiment 2")$prop_ambig_selection
## t = 1.3557, df = 36, p-value = 0.1836
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1068138 0.5045310
## sample estimates:
## cor
## 0.2203953
##plot
ggplot(filter(subj_selection,experiment_name=="Experiment 2"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
scale_color_manual(limits=c("ambiguous"),
values=c("#E41A1C"))+
geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
theme_classic()+
scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
theme_classic(base_size=24)+
ylab("Test Accuracy")+
theme(legend.position="none")+
xlab("Probability of \nAmbiguous Selection")
Analysis of learners’ sampling preferences.
ggplot(filter(subj_selection,experiment_name=="Experiment 3"),aes(x=num_ambig_selection,fill=as.factor(num_ambig_selection),color=as.factor(num_ambig_selection)))+
scale_fill_brewer(palette="Set1",direction=-1)+
scale_color_brewer(palette="Set1",direction=-1)+
geom_bar(stat="count",size=1.5,alpha=0.2,width=0.5)+
theme(axis.title = element_text(size=20),
axis.text = element_text(size=16),
legend.position="none")+
ylab("Number of subjects")+
xlab("Number of ambiguous selections")+
scale_x_continuous(breaks=c(0,1,2))+
scale_y_continuous(breaks=c(0,5,10,15,20,25,30))
#descriptives
subj_summary_3 <- subj_selection %>%
filter(experiment_name=="Experiment 3") %>%
group_by(ambiguity_condition) %>%
summarize(
N=n(),
prop_ambiguous=mean(prop_ambig_selection),
ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
) %>%
select(-ci_ambiguous)
kable(subj_summary_3)
ambiguity_condition | N | prop_ambiguous | prop_ambiguous_lower_ci | prop_ambiguous_upper_ci |
---|---|---|---|---|
ambiguous_me | 56 | 0.6339286 | 0.5436849 | 0.7241723 |
m=glmer(isAmbiguous ~ 1+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isAmbiguous ~ 1 + (1 | subject) + (1 | choiceImage)
## Data:
## subset(d, experiment_name == "Experiment 3" & trialType == "selection")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 153.1 161.3 -73.6 147.1 109
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3189 -1.3126 0.7582 0.7601 0.7618
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 2.663e-15 5.160e-08
## choiceImage (Intercept) 1.529e-03 3.911e-02
## Number of obs: 112, groups: subject, 56; choiceImage, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5498 0.2031 2.707 0.00679 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(m,method="Wald")[3,]
## 2.5 % 97.5 %
## 0.1517166 0.9479760
Note that we ignore a singular fit warning here. This appears to be caused by the inclusion of a by-item random intercept - however, it does not appear to adversely affect the model fit, and a simplified model with the by-stimulus random intercept removed yields virtually identical results.
wilcox.test(filter(subj_selection,experiment_name=="Experiment 3")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)
##
## Wilcoxon signed rank test with continuity correction
##
## data: filter(subj_selection, experiment_name == "Experiment 3")$prop_ambig_selection
## V = 330, p-value = 0.005553
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
## 0.5000378 1.0000000
## sample estimates:
## (pseudo)median
## 0.9999326
#predict ambiguous selections from age
m <- glmer(isAmbiguous~Age+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isAmbiguous ~ Age + (1 | subject) + (1 | choiceImage)
## Data:
## subset(d, experiment_name == "Experiment 3" & trialType == "selection")
##
## AIC BIC logLik deviance df.resid
## 148.4 159.3 -70.2 140.4 108
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2833 -1.0456 0.5800 0.7368 1.2408
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 1.081e-08 0.000104
## choiceImage (Intercept) 2.176e-03 0.046648
## Number of obs: 112, groups: subject, 56; choiceImage, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.9284 1.0046 -1.919 0.0549 .
## Age 0.4551 0.1837 2.478 0.0132 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Age -0.978
confint(m, method="Wald")[3:4,]
## 2.5 % 97.5 %
## (Intercept) -3.89741195 0.04068293
## Age 0.09510293 0.81512510
#plot model predictions
pX <- data.frame(Age=seq(min(subset(d,experiment_name=="Experiment 3"&trialType=="selection")$Age,na.rm=T),max(subset(d,experiment_name=="Experiment 3"&trialType=="selection")$Age,na.rm=T),by=0.1))
pY <- predictSE(m,pX,re.form=NA,type="response")
pX$isAmbiguous <- pY$fit
pX$YLower <- pY$fit-pY$se.fit
pX$YUpper <- pY$fit+pY$se.fit
ggplot(pX,aes(Age,isAmbiguous))+
geom_violinh(data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),scale="count",width=0.1, trim=F)+
geom_jitter(data=subset(d,experiment_name=="Experiment 3"&trialType=="selection"),aes(y=isAmbiguous,group=isAmbiguous),height=0.01)+
geom_smooth(aes(ymin=YLower,ymax=YUpper),stat="identity",color="#E41A1C",fill="#E41A1C")+
geom_hline(yintercept=0.5,linetype="dotted")+
theme_classic(base_size=16)+
xlab("Age (in years)")+
scale_x_continuous(breaks=c(3,4,5,6,7,8))+
ylab("Proportion of ambiguous selections")+
theme(axis.title = element_text(size=20),
axis.text = element_text(size=16))
Analysis of learners’ test accuracy.
##plot by item type
#by item type
subj_summary_item_3 <- summarySEwithin(
filter(
subj_test_item,
(experiment_name=="Experiment 3")),
"accuracy",
betweenvars=c("experiment_name","ambiguity_condition"),
withinvars=c("targetIsAmbiguousYN"),
idvar="subject") %>%
mutate(
lower_ci = accuracy - ci,
upper_ci = accuracy + ci
)
ggplot(subj_summary_item_3,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
geom_dotplot(data=filter(subj_test_item,experiment_name=="Experiment 3"),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
scale_color_manual(name="Item Type",
limits=c("no","yes"),
labels=c("mutual exclusivity","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_fill_manual(name="Item Type",
limits=c("no","yes"),
labels=c("mutual exclusivity","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_x_discrete(name = "Item Type",
limits=c("no","yes"),
labels=c("mutual exclusivity","ambiguous"))+
ylab("Test Accuracy")+
geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
theme_classic(base_size=16)+
theme(legend.position="none", axis.text.x=element_text(size=15))
## descriptives
subj_summary_test_3 <- subj_test %>%
filter(experiment_name=="Experiment 3") %>%
group_by(experiment_name,ambiguity_condition) %>%
summarize(
N=n(),
prop_correct=mean(accuracy,na.rm=T),
ci=qt(0.975, N-1)*sd(accuracy,na.rm=T)/sqrt(N),
prop_correct_lower_ci=prop_correct-ci,
prop_correct_upper_ci=prop_correct+ci)
subj_summary_test_3 %>%
select(-ci) %>%
kable()
experiment_name | ambiguity_condition | N | prop_correct | prop_correct_lower_ci | prop_correct_upper_ci |
---|---|---|---|---|---|
Experiment 3 | ambiguous_me | 56 | 0.5758929 | 0.4839003 | 0.6678855 |
subj_summary_item_3 %>%
select(-accuracy_norm,-se,-sd,-ci) %>%
kable()
experiment_name | ambiguity_condition | targetIsAmbiguousYN | N | accuracy | lower_ci | upper_ci |
---|---|---|---|---|---|---|
Experiment 3 | ambiguous_me | no | 56 | 0.6160714 | 0.5280317 | 0.7041111 |
Experiment 3 | ambiguous_me | yes | 56 | 0.5357143 | 0.4476746 | 0.6237540 |
##logistic mixed-effects model
d$offset.25 <- 1/4
#overall
m <- glmer(isRight~offset(logit(offset.25))+(1|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 3"&trialType=="test"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isRight ~ offset(logit(offset.25)) + (1 | subject) + (1 | targetImage)
## Data: filter(d, experiment_name == "Experiment 3" & trialType == "test")
##
## AIC BIC logLik deviance df.resid
## 289.9 300.2 -142.0 283.9 221
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6324 -0.6345 0.3944 0.6126 1.2762
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 2.17173 1.4737
## targetImage (Intercept) 0.09708 0.3116
## Number of obs: 224, groups: subject, 56; targetImage, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.5272 0.3007 5.079 3.79e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
d$offset.25 <- 1/4
##logistic mixed-effects model
#by item type
m <- glmer(isRight~1+targetIsAmbiguousC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=filter(d,experiment_name=="Experiment 3"&trialType=="test"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isRight ~ 1 + targetIsAmbiguousC + (1 + targetIsAmbiguousC |
## subject) + (1 | targetImage)
## Data: filter(d, experiment_name == "Experiment 3" & trialType == "test")
##
## AIC BIC logLik deviance df.resid
## 293.5 314.0 -140.8 281.5 218
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7428 -0.6127 0.3710 0.5865 1.3274
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 2.5206 1.5876
## targetIsAmbiguousC 0.7476 0.8646 0.39
## targetImage (Intercept) 0.1001 0.3164
## Number of obs: 224, groups: subject, 56; targetImage, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.4427 0.3168 1.397 0.162
## targetIsAmbiguousC -0.4392 0.3615 -1.215 0.224
##
## Correlation of Fixed Effects:
## (Intr)
## trgtIsAmbgC 0.064
confint(m,method="Wald")[5:6,]
## 2.5 % 97.5 %
## (Intercept) -0.1781934 1.0636353
## targetIsAmbiguousC -1.1478162 0.2693513
#summarize test accuracy by choice
subj_test_choice_3 <- d %>%
filter(trialType=="test"&experiment_name %in% c("Experiment 3")) %>%
group_by(subject,experiment_name,ambiguity_condition,chosen) %>%
summarize(
N=n(),
accuracy=mean(isRight,na.rm=T))
subj_summary_choice_3 <- summarySEwithin(
subj_test_choice_3,
"accuracy",
betweenvars=c("experiment_name","ambiguity_condition"),
withinvars=c("chosen"),
idvar="subject",
na.rm=T) %>%
mutate(
lower_ci = accuracy - ci,
upper_ci = accuracy + ci,
chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
)
subj_summary_choice_3 %>%
select(-accuracy_norm, -sd,-se,-ci) %>%
kable()
experiment_name | ambiguity_condition | chosen | N | accuracy | lower_ci | upper_ci | chosen_factor |
---|---|---|---|---|---|---|---|
Experiment 3 | ambiguous_me | 0 | 56 | 0.5595238 | 0.4653227 | 0.6537249 | NOT SAMPLED |
Experiment 3 | ambiguous_me | 1 | 56 | 0.5982143 | 0.5040132 | 0.6924154 | SAMPLED |
subj_test_item_choice_3 <- d %>%
filter(trialType=="test"&experiment_name %in% c("Experiment 3")) %>%
group_by(subject,experiment_name,ambiguity_condition,chosen,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
summarize(
N=n(),
accuracy=mean(isRight,na.rm=T))
subj_summary_item_choice_3 <- summarySEwithin(
subj_test_item_choice_3,
"accuracy",
betweenvars=c("experiment_name","ambiguity_condition"),
withinvars=c("chosen","targetIsAmbiguousYN"),
idvar="subject",
na.rm=T) %>%
mutate(
lower_ci = accuracy - ci,
upper_ci = accuracy + ci,
chosen_factor=ifelse(chosen==0,"NOT SAMPLED","SAMPLED")
)
ggplot(subj_summary_item_choice_3,aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.6,position=position_dodge(.53))+
scale_color_manual(name="Item Type",
limits=c("no","yes"),
labels=c("mutual exclusivity","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_fill_manual(name="Item Type",
limits=c("no","yes"),
labels=c("mutual exclusivity","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_x_discrete(name = "Item Type",
limits=c("no","yes"),
labels=c("mutual exclusivity","ambiguous"))+
ylab("Test Accuracy")+
geom_hline(yintercept=1/4,linetype="dashed",size=1.1)+
theme_classic(base_size=16)+
theme(legend.position="none", axis.text.x=element_text(size=15))+
facet_wrap(~chosen_factor)
m <- glmer(isRight~targetIsAmbiguousC*chosenC+(1+targetIsAmbiguousC|subject)+(1|targetImage),data=subset(d,trialType=="test"&experiment_name=="Experiment 3"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isRight ~ targetIsAmbiguousC * chosenC + (1 + targetIsAmbiguousC |
## subject) + (1 | targetImage)
## Data: subset(d, trialType == "test" & experiment_name == "Experiment 3")
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 296.5 323.8 -140.3 280.5 216
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9202 -0.6319 0.3743 0.5870 1.5328
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 2.47115 1.5720
## targetIsAmbiguousC 0.52951 0.7277 0.38
## targetImage (Intercept) 0.09831 0.3135
## Number of obs: 224, groups: subject, 56; targetImage, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.4250 0.3188 1.333 0.182
## targetIsAmbiguousC -0.5291 0.3671 -1.441 0.150
## chosenC 0.3414 0.3474 0.983 0.326
## targetIsAmbiguousC:chosenC 0.2390 0.8039 0.297 0.766
##
## Correlation of Fixed Effects:
## (Intr) trgIAC chosnC
## trgtIsAmbgC 0.045
## chosenC 0.001 -0.255
## trgtIsAmC:C -0.164 0.003 0.031
confint(m, method="Wald")[5:8,]
## 2.5 % 97.5 %
## (Intercept) -0.1997402 1.0497618
## targetIsAmbiguousC -1.2487101 0.1904759
## chosenC -0.3395635 1.0223851
## targetIsAmbiguousC:chosenC -1.3365340 1.8145833
##relationship between sampling preference for ambiguous items and test accuracy
cor.test(subset(subj_selection,ambiguity_condition=="ambiguous_me"&experiment_name=="Experiment 3")$accuracy,subset(subj_selection,ambiguity_condition=="ambiguous_me"&experiment_name=="Experiment 3")$prop_ambig_selection)
##
## Pearson's product-moment correlation
##
## data: subset(subj_selection, ambiguity_condition == "ambiguous_me" & and subset(subj_selection, ambiguity_condition == "ambiguous_me" & experiment_name == "Experiment 3")$accuracy and experiment_name == "Experiment 3")$prop_ambig_selection
## t = 0.49839, df = 54, p-value = 0.6202
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1987701 0.3247892
## sample estimates:
## cor
## 0.06766648
##plot
ggplot(filter(subj_selection,experiment_name=="Experiment 3"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
geom_dotplot(aes(group=prop_ambig_selection,fill=ambiguity_condition),alpha=0.6,binaxis="y",stackdir="center",dotsize=0.8)+
scale_color_manual(limits=c("ambiguous_me"),
values=c("#E41A1C"))+
geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
theme_classic()+
scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
theme_classic(base_size=24)+
ylab("Test Accuracy")+
theme(legend.position="none")+
xlab("Probability of \nAmbiguous Selection")
Analysis of learners’ sampling preferences.
## Plot
d$conditionFull <- ifelse(d$ambiguity_condition=="ambiguous",0,
ifelse(d$ambiguity_condition=="partially ambiguous",-1,NA))
m <- glmer(isAmbiguous~conditionFull+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial, glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
model_pred <- data.frame(conditionFull=c(-1,0),ambiguity_condition=c("partially ambiguous","ambiguous"))
pY <- predictSE(m,model_pred, type="response")
model_pred <- model_pred %>%
mutate(
prop_ambiguous = pY$fit,
prop_ambiguous_lower_ci = pY$fit - 1.96*pY$se.fit,
prop_ambiguous_upper_ci = pY$fit + 1.96*pY$se.fit)
p_expS1_sampling <- ggplot(model_pred,aes(x=ambiguity_condition,y=prop_ambiguous,color=ambiguity_condition,fill=ambiguity_condition))+
geom_bar(stat="identity",size=2.5,fill="white",width=0.5)+
geom_dotplot(data=subset(subj_selection,experiment_name=="Experiment S1"), aes(y=prop_ambig_selection),binaxis="y",stackdir="center",alpha=0.5,dotsize=0.6)+
geom_errorbar(aes(ymin=prop_ambiguous_lower_ci,ymax=prop_ambiguous_upper_ci),width=0,size=1.2)+
ylab("Probability of \nAmbiguous Selection")+
geom_hline(yintercept=0.5,linetype="dotted")+
scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
scale_color_brewer(palette="Set1")+
scale_fill_brewer(palette="Set1")+
theme_classic(base_size=18)+
theme(legend.position="none")+
scale_x_discrete(name="Condition")
p_expS1_sampling
#descriptives
subj_summary_s1 <- subj_selection %>%
filter(experiment_name=="Experiment S1") %>%
group_by(ambiguity_condition) %>%
summarize(
N=n(),
prop_ambiguous=mean(prop_ambig_selection),
ci_ambiguous=qt(0.975, N-1)*sd(prop_ambig_selection,na.rm=T)/sqrt(N),
prop_ambiguous_lower_ci=prop_ambiguous-ci_ambiguous,
prop_ambiguous_upper_ci=prop_ambiguous+ci_ambiguous,
) %>%
select(-ci_ambiguous)
kable(subj_summary_s1)
ambiguity_condition | N | prop_ambiguous | prop_ambiguous_lower_ci | prop_ambiguous_upper_ci |
---|---|---|---|---|
ambiguous | 28 | 0.6428571 | 0.5359183 | 0.7497960 |
partially ambiguous | 34 | 0.4779412 | 0.3910626 | 0.5648197 |
Estimating the difference in preference for selecting ambiguous items in the Partially Ambiguous condition vs. the Fully Ambiguous condition.
d$conditionC <- ifelse(d$ambiguity_condition=="ambiguous",0.5,
ifelse(d$ambiguity_condition=="partially ambiguous",-0.5,NA))
m <- glmer(isAmbiguous~conditionC+(1|subject)+(1|choiceImage),data=subset(d,(experiment_name=="Experiment S1")&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isAmbiguous ~ conditionC + (1 | subject) + (1 | choiceImage)
## Data: subset(d, (experiment_name == "Experiment S1") & trialType ==
## "selection")
## Control: glmerControl(check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 342 356 -167 334 244
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3825 -0.9601 0.6865 0.9394 1.0968
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 1.151e-01 3.393e-01
## choiceImage (Intercept) 8.752e-10 2.958e-05
## Number of obs: 248, groups: subject, 62; choiceImage, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.2569 0.1401 1.833 0.0668 .
## conditionC 0.6955 0.2815 2.471 0.0135 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## conditionC 0.145
confint(m,method="Wald")[3:4,]
## 2.5 % 97.5 %
## (Intercept) -0.01777231 0.5315834
## conditionC 0.14384157 1.2472401
Estimating the preference for (fully) ambiguous items by re-centering the model on the fully ambiguous condition. The intercept represents the preference for selecting ambiguous items in the Fully Ambiguous condition.
## Fully Ambiguous Condition
d$conditionFull <- ifelse(d$ambiguity_condition=="ambiguous",0,
ifelse(d$ambiguity_condition=="partially ambiguous",-1,NA))
m <- glmer(isAmbiguous~conditionFull+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isAmbiguous ~ conditionFull + (1 | subject) + (1 | choiceImage)
## Data:
## subset(d, experiment_name == "Experiment S1" & trialType == "selection")
## Control: glmerControl(check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 342 356 -167 334 244
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3825 -0.9601 0.6865 0.9394 1.0968
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.1151 0.3393
## choiceImage (Intercept) 0.0000 0.0000
## Number of obs: 248, groups: subject, 62; choiceImage, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6047 0.2125 2.845 0.00444 **
## conditionFull 0.6955 0.2815 2.471 0.01347 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## conditinFll 0.758
confint(m,method="Wald")[3:4,]
## 2.5 % 97.5 %
## (Intercept) 0.1881258 1.021231
## conditionFull 0.1438395 1.247238
Estimating the preference for (partially) ambiguous items by re-centering the model on the partially ambiguous condition. The intercept represents the preference for selecting ambiguous items in the Partially Ambiguous condition.
d$conditionPartial <- ifelse(d$ambiguity_condition=="ambiguous",1,
ifelse(d$ambiguity_condition=="partially ambiguous",0,NA))
m <- glmer(isAmbiguous~conditionPartial+(1|subject)+(1|choiceImage),data=subset(d,experiment_name=="Experiment S1"&trialType=="selection"),family=binomial,glmerControl(check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isAmbiguous ~ conditionPartial + (1 | subject) + (1 | choiceImage)
## Data:
## subset(d, experiment_name == "Experiment S1" & trialType == "selection")
## Control: glmerControl(check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 342 356 -167 334 244
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3825 -0.9601 0.6865 0.9394 1.0968
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.1151 0.3393
## choiceImage (Intercept) 0.0000 0.0000
## Number of obs: 248, groups: subject, 62; choiceImage, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.09086 0.18365 -0.495 0.6208
## conditionPartial 0.69553 0.28148 2.471 0.0135 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## conditnPrtl -0.656
confint(m,method="Wald")[3:4,]
## 2.5 % 97.5 %
## (Intercept) -0.4508106 0.2690974
## conditionPartial 0.1438339 1.2472347
##condition comparison
wilcox.test(subset(subj_selection , ambiguity_condition=="ambiguous"&experiment_name=="Experiment S1")$prop_ambig_selection,
subset(subj_selection, ambiguity_condition=="partially ambiguous"&experiment_name=="Experiment S1")$prop_ambig_selection,
conf.int=T, conf.level=0.95)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(subj_selection, ambiguity_condition == "ambiguous" & experiment_name == and subset(subj_selection, ambiguity_condition == "partially ambiguous" & "Experiment S1")$prop_ambig_selection and experiment_name == "Experiment S1")$prop_ambig_selection
## W = 631.5, p-value = 0.02256
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## 2.599459e-05 2.500091e-01
## sample estimates:
## difference in location
## 0.2499557
##Fully Ambiguous Condition
wilcox.test(subset(subj_selection, experiment_name=="Experiment S1"&ambiguity_condition=="ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(subj_selection, experiment_name == "Experiment S1" & ambiguity_condition == "ambiguous")$prop_ambig_selection
## V = 140, p-value = 0.01534
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
## 0.5000486 0.8750296
## sample estimates:
## (pseudo)median
## 0.750061
##Partially Ambiguous Condition
wilcox.test(subset(subj_selection, experiment_name=="Experiment S1"&ambiguity_condition=="partially ambiguous")$prop_ambig_selection,mu=1/2, conf.int=T, conf.level=0.95)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(subj_selection, experiment_name == "Experiment S1" & ambiguity_condition == "partially ambiguous")$prop_ambig_selection
## V = 100.5, p-value = 0.5901
## alternative hypothesis: true location is not equal to 0.5
## 95 percent confidence interval:
## 0.2500428 0.6249926
## sample estimates:
## (pseudo)median
## 0.4999225
Analysis of learners’ test performance.
##summarize participant accuracy by test half
subj_test_half <- d %>%
filter(trialType=="test"&experiment_name=="Experiment S1") %>%
group_by(subject,experiment_name,ambiguity_condition, testHalf) %>%
summarize(
N=n(),
accuracy=mean(isRight,na.rm=T))
##by item and test half
subj_test_half_item <- d %>%
filter(trialType=="test"&experiment_name=="Experiment S1") %>%
group_by(subject,experiment_name,ambiguity_condition, testHalf,targetType,targetIsAmbiguous,targetIsAmbiguousYN) %>%
summarize(
N=n(),
accuracy=mean(isRight,na.rm=T))
#item summary within-subjects corrected CIs
subj_summary_item_testhalf_correctedCIs_s1 <- summarySEwithin(
filter(
subj_test_half_item,
(experiment_name=="Experiment S1")),
"accuracy",
betweenvars=c("experiment_name","ambiguity_condition"),
withinvars=c("testHalf","targetIsAmbiguousYN"),
idvar="subject") %>%
mutate(
lower_ci = accuracy - ci,
upper_ci = accuracy + ci,
testHalf_name = paste("Test Block",testHalf,sep=" ")
)
#Experiment S1 - test
p_expS1_fulltest <- ggplot(filter(subj_summary_item_testhalf_correctedCIs_s1,ambiguity_condition=="ambiguous"),aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
geom_point(data=filter(subj_test_half_item,experiment_name=="Experiment S1"&ambiguity_condition=="ambiguous"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
scale_color_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_fill_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","ambiguous"),
values=c("#4DAF4A","#E41A1C"))+
scale_x_discrete(name = "Item Type\n\nFully Ambiguous Condition",
limits=c("no","yes"),
labels=c("disambig-\nuated","fully\nambiguous"))+
ylab("Test Accuracy")+
geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
theme_classic(base_size=12)+
theme(legend.position="none", axis.text.x=element_text(size=9))+
facet_wrap(~testHalf_name)
p_expS1_partialtest <- ggplot(filter(subj_summary_item_testhalf_correctedCIs_s1, ambiguity_condition=="partially ambiguous"),aes(x=targetIsAmbiguousYN,y=accuracy,color=targetIsAmbiguousYN,fill=targetIsAmbiguousYN))+
geom_bar(stat="identity",size=1.5,position=position_dodge(.53),width=0.5,alpha=0)+
geom_point(data=filter(subj_test_half_item,experiment_name=="Experiment S1"&ambiguity_condition=="partially ambiguous"),aes(fill=targetIsAmbiguousYN),alpha=0.5,position=position_jitterdodge(dodge.width=0.53,jitter.width=0.1,jitter.height=0.02))+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0.0,size=0.8,position=position_dodge(.53))+
scale_color_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","partially\nambiguous"),
values=c("#4DAF4A","#377EB8"))+
scale_fill_manual(name="Item Type",
limits=c("no","yes"),
labels=c("disambiguated","partially\nambiguous"),
values=c("#4DAF4A","#377EB8"))+
scale_x_discrete(name = "Item Type\n\nPartially Ambiguous Condition",
limits=c("no","yes"),
labels=c("disambig-\nuated","partially\nambiguous"))+
ylab("Test Accuracy")+
geom_hline(yintercept=1/8,linetype="dashed",size=1.1)+
theme_classic(base_size=12)+
theme(legend.position="none", axis.text.x=element_text(size=9))+
facet_wrap(~testHalf_name)
plot_grid(p_expS1_fulltest,p_expS1_partialtest, labels=c("A","B"),rel_widths=c(1,1),label_size=24,nrow=1)
Split by test half.
## descriptives
subj_summary_test_s1 <- summarySEwithin(
subj_test_half,
"accuracy",
betweenvars=c("experiment_name","ambiguity_condition"),
withinvars=c("testHalf"),
idvar="subject") %>%
mutate(
lower_ci = accuracy - ci,
upper_ci = accuracy + ci,
testHalf_name = paste("Test Block",testHalf,sep=" ")
) %>%
select(-accuracy_norm,-se,-sd,-ci)
kable(subj_summary_test_s1)
experiment_name | ambiguity_condition | testHalf | N | accuracy | lower_ci | upper_ci | testHalf_name |
---|---|---|---|---|---|---|---|
Experiment S1 | ambiguous | 1 | 28 | 0.6517857 | 0.6118389 | 0.6917325 | Test Block 1 |
Experiment S1 | ambiguous | 2 | 28 | 0.7276786 | 0.6877318 | 0.7676254 | Test Block 2 |
Experiment S1 | partially ambiguous | 1 | 34 | 0.7683824 | 0.7248067 | 0.8119580 | Test Block 1 |
Experiment S1 | partially ambiguous | 2 | 34 | 0.7757353 | 0.7321596 | 0.8193110 | Test Block 2 |
Split by test half.
## split by item
subj_summary_test_item_s1 <- summarySEwithin(
subj_test_half_item,
"accuracy",
betweenvars=c("experiment_name","ambiguity_condition"),
withinvars=c("testHalf","targetIsAmbiguousYN"),
idvar="subject") %>%
mutate(
lower_ci = accuracy - ci,
upper_ci = accuracy + ci,
testHalf_name = paste("Test Block",testHalf,sep=" ")
) %>%
select(-accuracy_norm,-se,-sd,-ci)
kable(subj_summary_test_item_s1)
experiment_name | ambiguity_condition | testHalf | targetIsAmbiguousYN | N | accuracy | lower_ci | upper_ci | testHalf_name |
---|---|---|---|---|---|---|---|---|
Experiment S1 | ambiguous | 1 | no | 28 | 0.8392857 | 0.7786705 | 0.8999009 | Test Block 1 |
Experiment S1 | ambiguous | 1 | yes | 28 | 0.4642857 | 0.3776673 | 0.5509041 | Test Block 1 |
Experiment S1 | ambiguous | 2 | no | 28 | 0.8571429 | 0.7783393 | 0.9359464 | Test Block 2 |
Experiment S1 | ambiguous | 2 | yes | 28 | 0.5982143 | 0.5156617 | 0.6807669 | Test Block 2 |
Experiment S1 | partially ambiguous | 1 | no | 34 | 0.7500000 | 0.6896160 | 0.8103840 | Test Block 1 |
Experiment S1 | partially ambiguous | 1 | yes | 34 | 0.7867647 | 0.7272860 | 0.8462434 | Test Block 1 |
Experiment S1 | partially ambiguous | 2 | no | 34 | 0.7941176 | 0.7390598 | 0.8491754 | Test Block 2 |
Experiment S1 | partially ambiguous | 2 | yes | 34 | 0.7573529 | 0.6839887 | 0.8307171 | Test Block 2 |
d$testHalfC <- ifelse(!is.na(d$testHalf) & d$testHalf==2,0.5,
ifelse(!is.na(d$testHalf) & d$testHalf==1,-0.5,NA))
#three-way interaction
m <- glmer(isRight~targetIsAmbiguousC*conditionC*testHalfC+(1+targetIsAmbiguousC*testHalfC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment S1")&trialType=="test"&ambiguity_condition!="non ambiguous"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isRight ~ targetIsAmbiguousC * conditionC * testHalfC + (1 +
## targetIsAmbiguousC * testHalfC | subject) + (1 | targetImage)
## Data: filter(d, (experiment_name == "Experiment S1") & trialType ==
## "test" & ambiguity_condition != "non ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 846.0 939.1 -404.0 808.0 973
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0148 -0.2249 0.1640 0.3500 4.4299
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 5.4970 2.3446
## targetIsAmbiguousC 1.0299 1.0148 0.02
## testHalfC 0.7031 0.8385 0.99 0.19
## targetIsAmbiguousC:testHalfC 0.5055 0.7110 0.35 0.94 0.50
## targetImage (Intercept) 0.1513 0.3889
## Number of obs: 992, groups: subject, 62; targetImage, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.0641 0.3900 5.292 1.21e-07
## targetIsAmbiguousC -1.3301 0.3950 -3.368 0.000758
## conditionC -0.7664 0.6784 -1.130 0.258643
## testHalfC 0.8217 0.3614 2.273 0.023007
## targetIsAmbiguousC:conditionC -2.7515 0.5694 -4.832 1.35e-06
## targetIsAmbiguousC:testHalfC -0.1361 0.6891 -0.197 0.843444
## conditionC:testHalfC 0.1909 0.5104 0.374 0.708427
## targetIsAmbiguousC:conditionC:testHalfC 0.7522 0.9304 0.808 0.418824
##
## (Intercept) ***
## targetIsAmbiguousC ***
## conditionC
## testHalfC *
## targetIsAmbiguousC:conditionC ***
## targetIsAmbiguousC:testHalfC
## conditionC:testHalfC
## targetIsAmbiguousC:conditionC:testHalfC
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trgIAC cndtnC tstHlC trIAC:C tIAC:H cnC:HC
## trgtIsAmbgC -0.174
## conditionC 0.028 -0.129
## testHalfC 0.447 -0.127 0.000
## trgtIsAmC:C -0.108 -0.019 -0.003 -0.067
## trgtIsAC:HC -0.031 0.418 -0.020 -0.306 -0.038
## cndtnC:tsHC -0.014 -0.097 0.476 -0.077 0.034 -0.236
## trgIAC:C:HC -0.039 -0.045 0.059 -0.247 0.260 -0.093 0.086
confint(m, method="Wald")[12:19,]
## 2.5 % 97.5 %
## (Intercept) 1.2996085 2.8285636
## targetIsAmbiguousC -2.1042260 -0.5560018
## conditionC -2.0960427 0.5633383
## testHalfC 0.1132606 1.5301136
## targetIsAmbiguousC:conditionC -3.8675112 -1.6355179
## targetIsAmbiguousC:testHalfC -1.4867027 1.2145210
## conditionC:testHalfC -0.8095147 1.1912735
## targetIsAmbiguousC:conditionC:testHalfC -1.0713783 2.5757980
#test specifically test half increase for ambiguous items in the Fully Ambiguous condition
d$targetIsAmbiguous_ambiguous <- ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==1,0,
ifelse(!is.na(d$targetIsAmbiguous) & d$targetIsAmbiguous==0,-1,NA))
m <- glmer(isRight~targetIsAmbiguous_ambiguous*conditionFull*testHalfC+(1+targetIsAmbiguous_ambiguous*testHalfC|subject)+(1|targetImage),data=filter(d,(experiment_name=="Experiment S1")&trialType=="test"&ambiguity_condition!="non ambiguous"),family=binomial,glmerControl(optimizer="bobyqa",check.conv.singular="ignore"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: isRight ~ targetIsAmbiguous_ambiguous * conditionFull * testHalfC +
## (1 + targetIsAmbiguous_ambiguous * testHalfC | subject) +
## (1 | targetImage)
## Data: filter(d, (experiment_name == "Experiment S1") & trialType ==
## "test" & ambiguity_condition != "non ambiguous")
## Control: glmerControl(optimizer = "bobyqa", check.conv.singular = "ignore")
##
## AIC BIC logLik deviance df.resid
## 846.0 939.1 -404.0 808.0 973
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0148 -0.2249 0.1640 0.3500 4.4299
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 5.8087 2.4101
## targetIsAmbiguous_ambiguous 1.0299 1.0148 0.23
## testHalfC 1.1294 1.0627 0.97 0.46
## targetIsAmbiguous_ambiguous:testHalfC 0.5055 0.7110 0.54 0.94
## targetImage (Intercept) 0.1513 0.3889
##
##
##
##
## 0.73
##
## Number of obs: 992, groups: subject, 62; targetImage, 8
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) 0.3280 0.5258 0.624
## targetIsAmbiguous_ambiguous -2.7059 0.4825 -5.608
## conditionFull -2.1421 0.7349 -2.915
## testHalfC 1.0371 0.4364 2.377
## targetIsAmbiguous_ambiguous:conditionFull -2.7515 0.5694 -4.833
## targetIsAmbiguous_ambiguous:testHalfC 0.2400 0.7947 0.302
## conditionFull:testHalfC 0.5670 0.7195 0.788
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC 0.7522 0.9303 0.809
## Pr(>|z|)
## (Intercept) 0.53276
## targetIsAmbiguous_ambiguous 2.05e-08 ***
## conditionFull 0.00356 **
## testHalfC 0.01747 *
## targetIsAmbiguous_ambiguous:conditionFull 1.35e-06 ***
## targetIsAmbiguous_ambiguous:testHalfC 0.76263
## conditionFull:testHalfC 0.43070
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC 0.41877
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trgIA_ cndtnF tstHlC trIA_:F tIA_:H cnF:HC
## trgtIsAmbg_ 0.236
## conditinFll 0.640 0.123
## testHalfC 0.498 0.161 0.300
## trgtIsAm_:F 0.181 0.575 0.384 0.073
## trgtIsA_:HC 0.133 0.346 0.062 0.481 0.119
## cndtnFll:HC 0.232 0.033 0.422 0.467 0.192 0.217
## trgIA_:F:HC 0.062 0.117 0.155 0.305 0.260 0.505 0.707
confint(m, method="Wald")[12:19,]
## 2.5 % 97.5 %
## (Intercept) -0.7025257 1.3584754
## targetIsAmbiguous_ambiguous -3.6515408 -1.7602106
## conditionFull -3.5824604 -0.7017594
## testHalfC 0.1818284 1.8924395
## targetIsAmbiguous_ambiguous:conditionFull -3.8674642 -1.6355665
## targetIsAmbiguous_ambiguous:testHalfC -1.3175109 1.7975297
## conditionFull:testHalfC -0.8432513 1.9772172
## targetIsAmbiguous_ambiguous:conditionFull:testHalfC -1.0711732 2.5755888
Fully Ambiguous Condition - Test Block 1
#correlations Experiment S1: proportion ambiguous items selected and test accuracy
subj_test_half <- subj_test_half %>%
left_join(select(subj_selection,subject,prop_ambig_selection)) %>%
mutate(
testHalf_name = paste("Test Block",testHalf,sep=" "))
#Fully Ambiguous - test block 1
cor.test(subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==1)$accuracy,subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==1)$prop_ambig_selection)
##
## Pearson's product-moment correlation
##
## data: subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf == and subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf == 1)$accuracy and 1)$prop_ambig_selection
## t = 2.5909, df = 26, p-value = 0.01549
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.09617591 0.70664954
## sample estimates:
## cor
## 0.4529988
Fully Ambiguous Condition - Test Block 2
#Fully Ambiguous - test block 2
cor.test(subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==2)$accuracy,subset(subj_test_half,ambiguity_condition=="ambiguous"&testHalf==2)$prop_ambig_selection)
##
## Pearson's product-moment correlation
##
## data: subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf == and subset(subj_test_half, ambiguity_condition == "ambiguous" & testHalf == 2)$accuracy and 2)$prop_ambig_selection
## t = 1.8848, df = 26, p-value = 0.07069
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.03028924 0.63734347
## sample estimates:
## cor
## 0.3467055
Partially Ambiguous Condition - Test Block 1
#Partially Ambiguous - test block 1
cor.test(subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==1)$accuracy,subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==1)$prop_ambig_selection)
##
## Pearson's product-moment correlation
##
## data: subset(subj_test_half, ambiguity_condition == "partially ambiguous" & and subset(subj_test_half, ambiguity_condition == "partially ambiguous" & testHalf == 1)$accuracy and testHalf == 1)$prop_ambig_selection
## t = 0.55366, df = 32, p-value = 0.5837
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2489571 0.4216846
## sample estimates:
## cor
## 0.09740938
Partially Ambiguous Condition - Test Block 2
#Partially Ambiguous - test block 2
cor.test(subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==2)$accuracy,subset(subj_test_half,ambiguity_condition=="partially ambiguous"&testHalf==2)$prop_ambig_selection)
##
## Pearson's product-moment correlation
##
## data: subset(subj_test_half, ambiguity_condition == "partially ambiguous" & and subset(subj_test_half, ambiguity_condition == "partially ambiguous" & testHalf == 2)$accuracy and testHalf == 2)$prop_ambig_selection
## t = -0.67859, df = 32, p-value = 0.5023
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4395666 0.2282537
## sample estimates:
## cor
## -0.1191055
#Experiment S1 - sampling-test
pS1_fullsampling_test <- ggplot(filter(subj_test_half,ambiguity_condition=="ambiguous"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
scale_color_manual(limits=c("ambiguous"),
labels=c("fully ambiguous"),
values=c("#E41A1C"))+
#geom_point(position=position_jitter(width=.05,height=.0))+
geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
theme_classic()+
#ylim(0,1.08)+
scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
theme_classic(base_size=12)+
ylab("Test Accuracy")+
theme(legend.position="none")+
xlab("Proportion of Ambiguous Selections\n\nFully Ambiguous Condition")+
facet_wrap(~testHalf_name)
pS1_partialsampling_test <- ggplot(filter(subj_test_half,ambiguity_condition=="partially ambiguous"),aes(prop_ambig_selection,accuracy, color=ambiguity_condition))+
geom_violin(aes(group=prop_ambig_selection),draw_quantiles=c(0.5))+
scale_color_manual(limits=c("partially ambiguous"),
labels=c("partially ambiguous"),
values=c("#377EB8"))+
#geom_point(position=position_jitter(width=.05,height=.0))+
geom_smooth(method="lm",color="black",fill="#4B0082",alpha=0.3)+
theme_classic()+
#ylim(0,1.08)+
scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0,1.08))+
theme_classic(base_size=12)+
ylab("Test Accuracy")+
theme(legend.position="none")+
xlab("Proportion of Ambiguous Selections\n\nPartially Ambiguous Condition")+
facet_wrap(~testHalf_name)
plot_grid(pS1_fullsampling_test,pS1_partialsampling_test,labels=c("A","B"),rel_widths=c(1,1),label_size=24,nrow=1)