Below is a
comparison of the commands used to read data into
in R, SAS, SPSS and Stata. The variables gender and workshop are
categorical factors and q1 to q4, pretest and posttest are considered
continuous and normally distributed. The practice data set is shown here. The programs and the data they use are also available for download here. Examples that are missing in the Stata column reflect differences between the two books. We will get around to those when we write the second edition of R for Stata Users.Reading a comma-delimited text file
| R | SAS | SPSS | Stata | # With id variable not named.
mydata <- read.csv("mydata.csv")
# With id named in the header. mydata <- read.csv("mydataID.csv", row.names = "id")
| PROC IMPORT OUT=myLib.mydata DATAFILE="mydataID.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=2; RUN; PROC PRINT; RUN;
| GET DATA /TYPE=TXT /FILE='mydataID.csv' /DELCASE=LINE /DELIMITERS="," /ARRANGEMENT=DELIMITED /FIRSTCASE=2 /IMPORTCASE=ALL /VARIABLES=id F1.0 workshop F1.0 gender A1.0 q1 F1.0 q2 F1.0 q3 F1.0 q4 F1.0 . LIST. SAVE OUTFILE='mydata.sav'.
| clear insheet using "mydata.csv", comma names list
|
Reading a tab-delimited text file
| R | SAS | SPSS | Stata | # With id variable not named. mydata <-read.delim("mydata.tab") mydata
# With ID named in the header. mydata <- read.delim("mydataID.tab", row.names = "id") mydata
| PROC IMPORT OUT=myLib.mydata DATAFILE="mydataID.tab" DBMS=TAB REPLACE; GETNAMES=YES; DATAROW=2; RUN; PROC PRINT; RUN;
| GET DATA /TYPE=TXT /FILE="mydataID.tab" /DELCASE=LINE /DELIMITERS="\t" /ARRANGEMENT=DELIMITED /FIRSTCASE=2 /IMPORTCASE=ALL /VARIABLES = id F1.0 workshop F1.0 gender A1.0 q1 F1.0 q2 F1.0 q3 F1.0 q4 F1.0 . LIST. EXECUTE. DATASET NAME DataSet1 WINDOW=FRONT.
| clear insheet using "mydataID.tab", tab names list
|
Reading a comma-delimited text file from a web site
| R | SAS | SPSS | Stata | myURL <- "http://sites.google.com/site/r4statistics/mydata.csv" mydata <- read.csv(myURL) mydata
| FILENAME myURL URL "http://sites.google.com/site/r4statistics/mydataID.csv"; PROC IMPORT DATAFILE= myURL DBMS=CSV REPLACE OUT= myLib.mydata; GETNAMES=YES; DATAROW=2; RUN; PROC PRINT; RUN;
| SPSS cannot read text files from a URL directly. It can read SPSS, SAS, Excel, and Stata files from a URL through the SPSSINC GETURI DATA extension.
|
|
Reading data within a program
| R | SAS | SPSS | Stata | # This works interactively # See book for alternative # approach for sourced files.
mydata <- read.csv( stdin() ) workshop,gender,q1,q2,q3,q4 1,1,f,1,1,5,1 2,2,f,2,1,4,1 3,1,f,2,2,4,3 4,2,NA,3,1,NA,3 5,1,m,4,5,2,4 6,2,m,5,4,5,5 7,1,m,5,3,4,4 8,2,m,4,5,5,5
# Blank line above ends input.
| LIBNAME myLib 'C:\myRfolder'; DATA myLib.mydata; INFILE DATALINES DELIMITER = ',' MISSOVER DSD firstobs=2 ; INPUT id workshop gender $ q1 q2 q3 q4; DATALINES; id,workshop,gender,q1,q2,q3,q4 1,1,f,1,1,5,1 2,2,f,2,1,4,1 3,1,f,2,2,4,3 4,2, ,3,1, ,3 5,1,m,4,5,2,4 6,2,m,5,4,5,5 7,1,m,5,3,4,4 8,2,m,4,5,5,5 PROC PRINT; RUN;
| DATA LIST / id 2 workshop 4 gender 6 (A) q1 8 q2 10 q3 12 q4 14. BEGIN DATA. 1,1,f,1,1,5,1 2,2,f,2,1,4,1 3,1,f,2,2,4,3 4,2, ,3,1, ,3 5,1,m,4,5,2,4 6,2,m,5,4,5,5 7,1,m,5,3,4,4 8,2,m,4,5,5,5 END DATA. LIST. SAVE OUTFILE='C:\myRfolder\mydata.sav'.
| clear all input id workshop str1 gender q1-q4, automatic 1 1 f 1 1 5 1 2 2 f 2 1 4 1 3 1 f 2 2 4 3 4 2 . 3 1 . 3 5 1 m 4 5 2 4 6 2 m 5 4 5 5 7 1 m 5 3 4 4 8 2 m 4 5 5 5 end list
|
Reading Multiple Observations per Line
| R | SAS | SPSS | Stata | # This works interactively
# See book for alternative
# approach for sourced files.
mylist <- scan( stdin(), what = list( id = 0, workshop = 0, gender = "", q1 = 0, q2 = 0, q3 = 0, q4 = 0)) 1 1 f 1 1 5 1 2 2 f 2 1 4 1 3 1 f 2 2 4 3 4 2 NA 3 1 NA 3 5 1 m 4 5 2 4 6 2 m 5 4 5 5 7 1 m 5 3 4 4 8 2 m 4 5 5 5
# Blank line above ends input. mylist
mydata <- data.frame(mylist) mydata
| DATA mydata; INPUT id workshop gender $ q1-q4 @@; DATALINES; 1 1 f 1 1 5 1 2 2 f 2 1 4 1 3 1 f 2 2 4 3 4 2 . 3 1 . 3 5 1 m 4 5 2 4 6 2 m 5 4 5 5 7 1 m 5 3 4 4 8 2 m 4 5 5 5 ; PROC PRINT; RUN;
| DATA LIST FREE/ id (f1.0) workshop (f1.0) gender (A) q1 (f1.0) q2 (f1.0) q3 (f1.0) q4 (f1.0).
BEGIN DATA. 1,1,f,1,1,5,1, 2,2,f,2,1,4,1 3,1,f,2,2,4,3, 4,2, ,3,1, ,3 5,1,m,4,5,2,4, 6,2,m,5,4,5,5 7,1,m,5,3,4,4, 8,2,m,4,5,5,5 END DATA.
LIST.
|
|
Reading Fixed-Width Text Files, One Record per Case
| R | SAS | SPSS | Stata | myfile <- "mydataFWF.txt" myVarNames <- c("id", "gender", "q1", "q2", "q3", "q4") myVarWidths <- c(2, -1, 1, 1, 1, 1, 1)
mydata <- read.fwf( file = myfile, width = myVarWidths, col.names = myVarNames, row.names = "id", na.strings = "", fill = TRUE, strip.white = TRUE) mydata
| DATA myLib.mydata; INFILE 'mydataFWF.txt' MISSOVER; INPUT id 1-2 workshop 3 gender $ 4 q1 5 q2 6 q3 7 q4 8; RUN; | DATA LIST FILE='mydataFWF.txt' RECORDS=1 /1 id 1-2 workshop 3 gender 4 (A) q1 5 q2 6 q3 7 q4 8. LIST. | infix id 1-2 workshop 3 gender 4 q1 5 q2 6 q3 7 q4 8 /// using mydataFWF.txt, clear list
|
Reading Fixed-Width Text Files, Two Records per Case
| R | SAS | SPSS | Stata | myfile <- "mydataFWF.txt" myVarNames <- c("id", "workshop", "gender", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8") myRecord1Widths <- c( 2, 1, 1, 1, 1, 1, 1) myRecord2Widths <- c(-2,-1,-1, 1, 1, 1, 1) myVarWidths <- list(myRecord1Widths, myRecord2Widths)
mydata <- read.fwf( file = myfile, width = myVarWidths, col.names = myVarNames, row.names = "id", na.strings = "", fill = TRUE, strip.white = TRUE ) mydata
| DATA temp; INFILE 'mydataFWF.txt' MISSOVER; INPUT #1 id 1-2 workshop 3 gender 4 q1 5 q2 6 q3 7 q4 8 #2 q5 5 q6 6 q7 7 q8 8; PROC PRINT; RUN;
| DATA LIST FILE='mydataFWF.txt' RECORDS=2 /1 id 1-2 workshop 3 gender 4 (A) q1 5 q2 6 q3 7 q4 8 /2 q5 5 q6 6 q7 7 q8 8. LIST.
| clear #delimit ; infix 2 lines 1: id 1-2 workshop 3 gender 4 q1 5 q2 6 q3 7 q4 8 2: q5 5 q6 6 q7 7 q8 8 using mydataFWF.txt; #delimit cr list
|
Reading Excel Files
| R | SAS | SPSS | Stata | # Do this once to install: install.packages("xlsReadWrite") library("xlsReadWrite") xls.getshlib()
# Now read the file: library("xlsReadWrite") mydata <- read.xls("mydata.xls") mydata
| PROC IMPORT OUT = mydata DATAFILE = "mydata.xls" DBMS = EXCELCS REPLACE; RANGE = "Sheet1$"; SCANTEXT = YES; USEDATE = YES; SCANTIME = YES; RUN;
| GET DATA /TYPE=XLS /FILE='mydata.xls' /SHEET=name 'Sheet1' /CELLRANGE=full /READNAMES=on /ASSUMEDSTRWIDTH=32767. EXECUTE.
|
|
Reading SAS Files
| R | SAS | SPSS | Stata | library("sas7bdat") read.sas7bdat("mydata.sas7bdat")
|
|
|
|
Reading SPSS Files
| R | SAS | SPSS | Stata | library("foreign") mydata <- read.spss("mydata.sav", use.value.labels = TRUE, to.data.frame = TRUE) mydata
|
|
|
|
Writing Comma-Delimited Text Files
| R | SAS | SPSS | Stata | write.csv(mydata, "mydataFromR.csv")
| PROC EXPORT DATA=MYLIB.MYDATA OUTFILE="mydataFromSAS.csv" DBMS=CSV REPLACE; PUTNAMES=YES;
| SAVE TRANSLATE OUTFILE='mydataFromSPSS.csv' /TYPE=CSV /MAP /REPLACE /FIELDNAMES /CELLS=VALUES.
| use mydata,clear outfile using mydata.csv, comma type mydata.csv
|
Writing Tab-Delimited Text Files
| R | SAS | SPSS | Stata | write.table(mydata, file = "mydataFromR.tab", quote = FALSE, sep = "\t", na = "", row.names = TRUE, col.names = TRUE)
# Space delimited:
write.table(mydata, "mydataFromR.txt")
| PROC EXPORT DATA=MYLIB.MYDATA OUTFILE="mydataFromSAS.txt" DBMS=TAB REPLACE; PUTNAMES=YES;
| SAVE TRANSLATE OUTFILE='mydataFromSPSS.dat' /TYPE=TAB /MAP /REPLACE /FIELDNAMES /CELLS=VALUES.
| use mydata,clear
outfile using mydata.tab, tab
type mydata.tab |
Writing Excel Files
| R | SAS | SPSS | Stata | # Do this once: install.packages("xlsReadWrite") library("xlsReadWrite") xls.getshlib()
# Write the file: library("xlsReadWrite") write.xls(mydata, "mydataFromR.xls")
| PROC EXPORT DATA=MYDATA OUTFILE="mydata.xls" DBMS=EXCELCS LABEL REPLACE; SHEET="mydata";
| SAVE TRANSLATE OUTFILE='mydataFromSPSS.xls' /TYPE=XLS /VERSION=2 /MAP /REPLACE /FIELDNAMES.
|
|
Writing SAS Files
| R | SAS | SPSS | Stata | library("foreign")
write.foreign(mydata, datafile = "mydataFromR.txt", codefile = "mydataFromR.sas", package = "SAS")
|
|
|
|
Writing SPSS Files| R | SAS | SPSS | Stata | library("foreign")
write.foreign(mydata, datafile = "mydataFromR.txt", codefile = "mydataFromR.sps", package = "SPSS")
|
|
|
| Writing Stata Files| R | SAS | SPSS | Stata | library("foreign")
write.foreign(mydata, datafile="mydata2.csv", codefile="mydata2.do", package="Stata") | | | |
|