R-code

Copy and paste the code below into R. The code uses the mvtnorm package, which you can install using:

> install.packages("mvtnorm")

To calculate summability for a dichotomously (0/1) scored data_frame with subjects as rows, and items as columns, use:

> summability (data_frame, dichotomous = TRUE)

Set dichotomous = FALSE if you have continuous scores

If you want to get calculations for "summability if item deleted":

> summability (data_frame, iid = TRUE, dichotomous = TRUE)

Additional option (added November 8th, 2018, updated January 16th, 2019):

Set na.rm = TRUE if your data contain missing values:

> summability (data_frame, na.rm=TRUE)

# summability.R

# Jelle Goeman & Nivja de Jong

# version December 6th, 2020

library(mvtnorm)

# calculates tetrachoric correlation from a 2x2 table

tet <- function(M, shrink=0.5) {

  # shrink toward independence as a continuity correction

  n <- sum(M)

  p <- sum(M[1,])/n

  q <- sum(M[,1])/n

  M <- M + shrink*matrix(c(p*q, (1-p)*q, p*(1-q), (1-p)*(1-q)),2,2)

  n <- sum(M)

  # calculate tetrachoric correlation

  if (any(is.na(M) | M==0)) { # solution at the edge of the parameter space

    return(ifelse(M[1,1]*M[2,2]==0, -1, 1))

  } else {         # solution in the interior

    mu <- c(qnorm(sum(M[,1])/n), qnorm(sum(M[1,])/n))

    target <- M[1,2]/n

    prob <- function(rho) {

      Sig <- matrix(c(1,rho,rho,1),2,2)

      pmvnorm(c(-Inf,0), c(0,Inf), mu, Sig) - target

    return(uniroot(prob, c(-1,1))$root)

# summability

summability <- function(IS, iid = FALSE, adjusted = TRUE, na.rm = FALSE, dichotomous) {

  IS <- as.matrix(IS)

  # remove redundant questions (only for computation speed; does not affect result)

  IS <- IS[,colSums(!is.na(IS)) > 1]

  IS <- IS[,!(apply(IS, 2, sd, na.rm=TRUE) == 0)]

  if (missing(dichotomous)) {

    dichotomous <- all(IS %in% 0:1 | is.na(IS))

  #number of items

  k <- ncol(IS)

  # calculate correlation matrix

  if (dichotomous) {

    # table

    if (na.rm) {

      a <- crossprod(replace(IS, is.na(IS), 0))

      b <- crossprod(replace(IS, is.na(IS), 0), 1-replace(IS, is.na(IS), 1))

      c <- crossprod(1-replace(IS, is.na(IS), 1), replace(IS, is.na(IS), 0))

      d <- crossprod(1-replace(IS, is.na(IS), 1))

    } else {

      a <- crossprod(IS)

      b <- crossprod(IS, 1-IS)

      c <- crossprod(1-IS, IS)

      d <- crossprod(1-IS)

    cor <- diag(k)

    for (i in 1:(k-1)) for(j in (i+1):k) {

      M <- matrix(c(a[i,j], b[i,j], c[i,j], d[i,j]), 2, 2)

      cor[i,j] <- cor[j,i] <- tet(M)

  } else

    if (na.rm) {

      cor <- cor(IS, use= "pairwise.complete.obs")

    } else {

      cor <- cor(IS)

  # standard deviations

  sd <- apply(IS, 2, sd, na.rm=TRUE)

  #covariance matrix

  cov <- cor * outer(sd,sd)

  # summability adjusted or unadjusted

  if (adjusted) {

    summa <- (sum(cov) - sum(diag(cov)))/(sum(sd)^2 - sum(diag(cov)))

    if (iid) iids <- sapply(1:k, function(i) {

      (sum(cov[-i,-i]) - sum(diag(cov[-i,-i])))/(sum(sd[-i])^2 - sum(diag(cov[-i,-i])))

})

  } else {

    summa <- sum(cov)/(sum(sd)^2)

    if (iid) iids <- sapply(1:k, function(i) {

      sum(cov[-i,-i])/(sum(sd[-i])^2)

})

  if (iid){

    names(iids) <- colnames(IS)

    list(summability=summa, iid = iids)

  } else

    summa

Google Sites

Report abuse