Example Programs‎ > ‎

Data Import & Export

Below is a comparison of the commands used to read data into in R, SAS, SPSS and Stata. The variables gender and workshop are categorical factors and q1 to q4, pretest and posttest are considered continuous and normally distributed. The practice data set is shown here. The programs and the data they use are also available for download here. Examples that are missing in the Stata column reflect differences between the two books. We will get around to those when we write the second edition of R for Stata Users.

Reading a comma-delimited text file

 R SAS SPSS Stata
# With id variable not named.

mydata <- read.csv("mydata.csv")

# With id named in the header.
mydata <- read.csv("mydataID.csv",
  row.names = "id")
PROC IMPORT OUT=myLib.mydata
  DATAFILE="mydataID.csv"
      DBMS=CSV REPLACE;
  GETNAMES=YES;
   DATAROW=2;
RUN;
PROC PRINT; RUN;
GET DATA  /TYPE=TXT
 /FILE='mydataID.csv'
 /DELCASE=LINE
 /DELIMITERS=","
 /ARRANGEMENT=DELIMITED
 /FIRSTCASE=2
 /IMPORTCASE=ALL
 /VARIABLES=id F1.0 
   workshop F1.0
   gender A1.0
   q1 F1.0  q2 F1.0 
   q3 F1.0  q4 F1.0 .
LIST.
SAVE OUTFILE='mydata.sav'.
clear
insheet using
  "mydata.csv",
  comma names
list



Reading a tab-delimited text file

 R SAS SPSS Stata
# With id variable not named.
mydata <-read.delim("mydata.tab")
mydata

# With ID named in the header.
mydata <-
  read.delim("mydataID.tab",
  row.names = "id")
mydata
PROC IMPORT
  OUT=myLib.mydata
  DATAFILE="mydataID.tab"
      DBMS=TAB REPLACE;
  GETNAMES=YES;
   DATAROW=2;
RUN;
PROC PRINT; RUN;
GET DATA
  /TYPE=TXT
  /FILE="mydataID.tab"
  /DELCASE=LINE
  /DELIMITERS="\t"
  /ARRANGEMENT=DELIMITED
  /FIRSTCASE=2
  /IMPORTCASE=ALL
 /VARIABLES = id F1.0  workshop F1.0 gender A1.0
 q1 F1.0  q2 F1.0  q3 F1.0  q4 F1.0 .
LIST.
EXECUTE.
DATASET NAME DataSet1 WINDOW=FRONT.
clear
insheet using 
  "mydataID.tab",
  tab names
list



Reading a comma-delimited text file from a web site

 R SAS SPSS Stata
myURL <- "http://sites.google.com/site/r4statistics/mydata.csv"
mydata <- read.csv(myURL)
mydata
FILENAME myURL URL
  "http://sites.google.com/site/r4statistics/mydataID.csv";
PROC IMPORT DATAFILE= myURL
            DBMS=CSV REPLACE
            OUT= myLib.mydata;
     GETNAMES=YES;
     DATAROW=2;
RUN;
PROC PRINT; RUN;
SPSS cannot read text files from a URL directly. It can read SPSS, SAS, Excel, and Stata files from a URL through the SPSSINC GETURI DATA extension.




Reading data within a program

 R SAS SPSS Stata
# This works interactively
# See book for alternative
# approach for sourced files.

mydata <- read.csv( stdin() )
workshop,gender,q1,q2,q3,q4
1,1,f,1,1,5,1
2,2,f,2,1,4,1
3,1,f,2,2,4,3
4,2,NA,3,1,NA,3
5,1,m,4,5,2,4
6,2,m,5,4,5,5
7,1,m,5,3,4,4
8,2,m,4,5,5,5

# Blank line above ends input.
LIBNAME myLib 'C:\myRfolder';
DATA myLib.mydata;
INFILE DATALINES DELIMITER = ','
   MISSOVER DSD firstobs=2 ;
INPUT id workshop gender $ q1 q2 q3 q4;
DATALINES;
id,workshop,gender,q1,q2,q3,q4
1,1,f,1,1,5,1
2,2,f,2,1,4,1
3,1,f,2,2,4,3
4,2, ,3,1, ,3
5,1,m,4,5,2,4
6,2,m,5,4,5,5
7,1,m,5,3,4,4
8,2,m,4,5,5,5
PROC PRINT; RUN;
DATA LIST / id 2 workshop 4
  gender 6 (A)
  q1 8  q2 10  q3 12  q4 14.
BEGIN DATA.
1,1,f,1,1,5,1
2,2,f,2,1,4,1
3,1,f,2,2,4,3
4,2, ,3,1, ,3
5,1,m,4,5,2,4
6,2,m,5,4,5,5
7,1,m,5,3,4,4
8,2,m,4,5,5,5
END DATA.
LIST.
SAVE OUTFILE='C:\myRfolder\mydata.sav'.
clear all
input id workshop str1 gender q1-q4, automatic
1 1 f 1 1 5 1
2 2 f 2 1 4 1
3 1 f 2 2 4 3
4 2 . 3 1 . 3
5 1 m 4 5 2 4
6 2 m 5 4 5 5
7 1 m 5 3 4 4
8 2 m 4 5 5 5
end
list




Reading Multiple Observations per Line

 R SAS SPSS Stata
# This works interactively
# See book for alternative
# approach for sourced files.

mylist <- scan( stdin(),
  what = list(
    id = 0,
    workshop = 0,
    gender   = "",
    q1 = 0, q2 = 0,
    q3 = 0, q4 = 0))
1 1 f 1 1 5 1    2 2 f 2 1 4 1
3 1 f 2 2 4 3    4 2 NA 3 1 NA 3
5 1 m 4 5 2 4    6 2 m 5 4 5 5
7 1 m 5 3 4 4    8 2 m 4 5 5 5

# Blank line above ends input.
mylist

mydata <- data.frame(mylist)
mydata
DATA mydata;
INPUT id workshop gender $ q1-q4 @@;
DATALINES;
1 1 f 1 1 5 1    2 2 f 2 1 4 1
3 1 f 2 2 4 3    4 2 . 3 1 . 3
5 1 m 4 5 2 4    6 2 m 5 4 5 5
7 1 m 5 3 4 4    8 2 m 4 5 5 5
;
PROC PRINT; RUN;
DATA LIST FREE/ id (f1.0)  workshop (f1.0) gender (A)
q1 (f1.0)  q2 (f1.0)  q3 (f1.0) q4 (f1.0).

BEGIN DATA.
1,1,f,1,1,5,1,    2,2,f,2,1,4,1
3,1,f,2,2,4,3,    4,2, ,3,1, ,3
5,1,m,4,5,2,4,    6,2,m,5,4,5,5
7,1,m,5,3,4,4,    8,2,m,4,5,5,5
END DATA.

LIST.




Reading Fixed-Width Text Files, One Record per Case

 R SAS SPSS Stata
myfile <- "mydataFWF.txt"
myVarNames  <-
  c("id", "gender", "q1",
    "q2", "q3", "q4")
myVarWidths <-
  c(2, -1, 1, 1, 1, 1, 1)

mydata <- read.fwf(
   file        = myfile,
   width       = myVarWidths,
   col.names   = myVarNames,
   row.names   = "id",
   na.strings  = "",
   fill        = TRUE,
   strip.white = TRUE)
mydata
DATA myLib.mydata;
INFILE 'mydataFWF.txt'
  MISSOVER;
INPUT id 1-2 workshop 3
  gender $ 4
  q1 5  q2 6  q3 7  q4 8;
RUN;
DATA LIST FILE='mydataFWF.txt'
  RECORDS=1
  /1 id 1-2 workshop 3
  gender 4 (A)  q1 5  q2 6 
  q3 7  q4 8.
LIST.
infix id 1-2 workshop 3
  gender 4 q1 5 q2 6 q3 7
  q4 8 ///
using mydataFWF.txt, clear
list



Reading Fixed-Width Text Files, Two Records per Case

 R SAS SPSS Stata
myfile <- "mydataFWF.txt"
myVarNames  <- c("id", "workshop", "gender",
  "q1", "q2", "q3", "q4",
  "q5", "q6", "q7", "q8")
myRecord1Widths  <-
  c( 2, 1, 1, 1, 1, 1, 1)
myRecord2Widths  <-
  c(-2,-1,-1, 1, 1, 1, 1)
myVarWidths <- list(myRecord1Widths,
     myRecord2Widths)

mydata <- read.fwf(
   file        = myfile,
   width       = myVarWidths,
   col.names   = myVarNames,
   row.names   = "id",
   na.strings  = "",
   fill        = TRUE,
   strip.white = TRUE )
mydata
DATA temp;
INFILE 'mydataFWF.txt'
  MISSOVER;
INPUT
  #1 id 1-2 workshop 3
     gender 4  q1 5  q2 6 
     q3 7  q4 8
  #2 q5 5  q6 6 
     q7 7  q8 8;
PROC PRINT;
RUN;
DATA LIST FILE='mydataFWF.txt'
  RECORDS=2
  /1 id 1-2 workshop 3
  gender 4 (A) q1 5 q2 6
  q3 7 q4 8
  /2 q5 5 q6 6 q7 7 q8 8.
LIST.
clear
#delimit ;
infix 2 lines
  1: id 1-2 workshop 3
     gender 4
     q1 5 q2 6 q3 7 q4 8
  2: q5 5 q6 6 q7 7 q8 8 using mydataFWF.txt;
#delimit cr
list



Reading Excel Files

 R SAS SPSS Stata
# Do this once to install:
install.packages("xlsReadWrite")
library("xlsReadWrite")
xls.getshlib()

# Now read the file:
library("xlsReadWrite")
mydata <- read.xls("mydata.xls")
mydata
PROC IMPORT
       OUT = mydata
  DATAFILE = "mydata.xls"
      DBMS = EXCELCS REPLACE;
  RANGE    = "Sheet1$";
  SCANTEXT = YES;
  USEDATE  = YES;
  SCANTIME = YES;
RUN;
GET DATA
  /TYPE=XLS
  /FILE='mydata.xls'
  /SHEET=name 'Sheet1'
  /CELLRANGE=full
  /READNAMES=on
  /ASSUMEDSTRWIDTH=32767.
EXECUTE.




Reading SAS Files

 R SAS SPSS Stata
library("sas7bdat")
read.sas7bdat("mydata.sas7bdat")






Reading SPSS Files

 R SAS SPSS Stata
library("foreign")
mydata <- read.spss("mydata.sav",
  use.value.labels = TRUE,
  to.data.frame    = TRUE)
mydata






Writing Comma-Delimited Text Files

 R SAS SPSS Stata
write.csv(mydata,
  "mydataFromR.csv")


PROC EXPORT DATA=MYLIB.MYDATA
  OUTFILE="mydataFromSAS.csv"
     DBMS=CSV REPLACE;
 PUTNAMES=YES;
SAVE TRANSLATE
 OUTFILE='mydataFromSPSS.csv'
  /TYPE=CSV
  /MAP
  /REPLACE
  /FIELDNAMES
  /CELLS=VALUES.
use mydata,clear
outfile using mydata.csv, comma
type mydata.csv




Writing Tab-Delimited Text Files

 R SAS SPSS Stata
write.table(mydata,
  file      = "mydataFromR.tab",
  quote     = FALSE,
  sep       = "\t",
  na        = "",
  row.names = TRUE,
  col.names = TRUE)

# Space delimited:
write.table(mydata,
  "mydataFromR.txt")
PROC EXPORT DATA=MYLIB.MYDATA
  OUTFILE="mydataFromSAS.txt"
     DBMS=TAB REPLACE;
 PUTNAMES=YES;
SAVE TRANSLATE
 OUTFILE='mydataFromSPSS.dat'
  /TYPE=TAB
  /MAP
  /REPLACE
  /FIELDNAMES
  /CELLS=VALUES.
use mydata,clear
outfile using mydata.tab, tab
type mydata.tab



Writing Excel Files

 R SAS SPSS Stata
# Do this once:
install.packages("xlsReadWrite")
library("xlsReadWrite")
xls.getshlib()

# Write the file:
library("xlsReadWrite")
write.xls(mydata,
  "mydataFromR.xls")
PROC EXPORT
     DATA=MYDATA
  OUTFILE="mydata.xls"
     DBMS=EXCELCS LABEL
     REPLACE;
     SHEET="mydata";
SAVE TRANSLATE
  OUTFILE='mydataFromSPSS.xls'
  /TYPE=XLS
  /VERSION=2
  /MAP
  /REPLACE
  /FIELDNAMES.




Writing SAS Files

 R SAS SPSS Stata
library("foreign")

write.foreign(mydata,
  datafile = "mydataFromR.txt",
  codefile = "mydataFromR.sas",
  package  = "SAS")






Writing SPSS Files

 R SAS SPSS Stata
library("foreign")

write.foreign(mydata,
  datafile = "mydataFromR.txt",
  codefile = "mydataFromR.sps",
  package  = "SPSS")






Writing Stata Files

 R SAS SPSS Stata
library("foreign")

write.foreign(mydata,
  datafile="mydata2.csv",
  codefile="mydata2.do",
  package="Stata")