Copy and paste the code below into R. The code uses the mvtnorm package, which you can install using:
> install.packages("mvtnorm")
To calculate summability for a dichotomously (0/1) scored data_frame with subjects as rows, and items as columns, use:
> summability (data_frame, dichotomous = TRUE)
Set dichotomous = FALSE if you have continuous scores
If you want to get calculations for "summability if item deleted":
> summability (data_frame, iid = TRUE, dichotomous = TRUE)
Additional option (added November 8th, 2018, updated January 16th, 2019):
Set na.rm = TRUE if your data contain missing values:
> summability (data_frame, na.rm=TRUE)
# summability.R
# Jelle Goeman & Nivja de Jong
# version December 6th, 2020
library(mvtnorm)
# calculates tetrachoric correlation from a 2x2 table
tet <- function(M, shrink=0.5) {
# shrink toward independence as a continuity correction
n <- sum(M)
p <- sum(M[1,])/n
q <- sum(M[,1])/n
M <- M + shrink*matrix(c(p*q, (1-p)*q, p*(1-q), (1-p)*(1-q)),2,2)
n <- sum(M)
# calculate tetrachoric correlation
if (any(is.na(M) | M==0)) { # solution at the edge of the parameter space
return(ifelse(M[1,1]*M[2,2]==0, -1, 1))
} else { # solution in the interior
mu <- c(qnorm(sum(M[,1])/n), qnorm(sum(M[1,])/n))
target <- M[1,2]/n
prob <- function(rho) {
Sig <- matrix(c(1,rho,rho,1),2,2)
pmvnorm(c(-Inf,0), c(0,Inf), mu, Sig) - target
}
return(uniroot(prob, c(-1,1))$root)
}
}
# summability
summability <- function(IS, iid = FALSE, adjusted = TRUE, na.rm = FALSE, dichotomous) {
IS <- as.matrix(IS)
# remove redundant questions (only for computation speed; does not affect result)
IS <- IS[,colSums(!is.na(IS)) > 1]
IS <- IS[,!(apply(IS, 2, sd, na.rm=TRUE) == 0)]
if (missing(dichotomous)) {
dichotomous <- all(IS %in% 0:1 | is.na(IS))
}
#number of items
k <- ncol(IS)
# calculate correlation matrix
if (dichotomous) {
# table
if (na.rm) {
a <- crossprod(replace(IS, is.na(IS), 0))
b <- crossprod(replace(IS, is.na(IS), 0), 1-replace(IS, is.na(IS), 1))
c <- crossprod(1-replace(IS, is.na(IS), 1), replace(IS, is.na(IS), 0))
d <- crossprod(1-replace(IS, is.na(IS), 1))
} else {
a <- crossprod(IS)
b <- crossprod(IS, 1-IS)
c <- crossprod(1-IS, IS)
d <- crossprod(1-IS)
}
cor <- diag(k)
for (i in 1:(k-1)) for(j in (i+1):k) {
M <- matrix(c(a[i,j], b[i,j], c[i,j], d[i,j]), 2, 2)
cor[i,j] <- cor[j,i] <- tet(M)
}
} else
if (na.rm) {
cor <- cor(IS, use= "pairwise.complete.obs")
} else {
cor <- cor(IS)
}
# standard deviations
sd <- apply(IS, 2, sd, na.rm=TRUE)
#covariance matrix
cov <- cor * outer(sd,sd)
# summability adjusted or unadjusted
if (adjusted) {
summa <- (sum(cov) - sum(diag(cov)))/(sum(sd)^2 - sum(diag(cov)))
if (iid) iids <- sapply(1:k, function(i) {
(sum(cov[-i,-i]) - sum(diag(cov[-i,-i])))/(sum(sd[-i])^2 - sum(diag(cov[-i,-i])))
})
} else {
summa <- sum(cov)/(sum(sd)^2)
if (iid) iids <- sapply(1:k, function(i) {
sum(cov[-i,-i])/(sum(sd[-i])^2)
})
}
if (iid){
names(iids) <- colnames(IS)
list(summability=summa, iid = iids)
} else
summa
}