merge dataframes and still preserve the original order
Post date: Apr 2, 2014 8:19:07 AM
I copied and pasted the R code below from Tal Galili. All the credits go to the original blog:
http://www.r-statistics.com/2012/01/merging-two-data-frame-objects-while-preserving-the-rows-order/
# ========================================================= # Merge while preserving the order # ========================================================= merge.with.order <- function(x,y, ..., sort = T, keep_order) { # this function works just like merge, only that it adds the option to return the merged data.frame ordered by x (1) or by y (2) add.id.column.to.data <- function(DATA) { data.frame(DATA, id... = seq_len(nrow(DATA))) } # add.id.column.to.data(data.frame(x = rnorm(5), x2 = rnorm(5))) order.by.id...and.remove.it <- function(DATA) { # gets in a data.frame with the "id..." column. Orders by it and returns it if(!any(colnames(DATA)=="id...")) stop("The function order.by.id...and.remove.it only works with data.frame objects which includes the 'id...' order column") ss_r <- order(DATA$id...) ss_c <- colnames(DATA) != "id..." DATA[ss_r, ss_c] } # tmp <- function(x) x==1; 1 # why we must check what to do if it is missing or not... # tmp() if(!missing(keep_order)) { if(keep_order == 1) return(order.by.id...and.remove.it(merge(x=add.id.column.to.data(x),y=y,..., sort = FALSE))) if(keep_order == 2) return(order.by.id...and.remove.it(merge(x=x,y=add.id.column.to.data(y),..., sort = FALSE))) # if you didn't get "return" by now - issue a warning. warning("The function merge.with.order only accepts NULL/1/2 values for the keep_order variable") } else {return(merge(x=x,y=y,..., sort = sort))} } # ================================================================= # Demo # ================================================================= x <- data.frame( ref = c( 'Ref1', 'Ref2' ) , label = c( 'Label01', 'Label02' ) ) y <- data.frame( id = c( 'A1', 'C2', 'B3', 'D4' ) , ref = c( 'Ref1', 'Ref2' , 'Ref3','Ref1' ) , val = c( 1.11, 2.22, 3.33, 4.44 ) ) # > x # ref label # 1 Ref1 Label01 # 2 Ref2 Label02 # > y # id ref val # 1 A1 Ref1 1.11 # 2 C2 Ref2 2.22 # 3 B3 Ref3 3.33 # 4 D4 Ref1 4.44 merge(x=x, y=y, by='ref', all.y = T, sort=F ) # > merge(x=x, y=y, by='ref', all.y = T, sort=F ) # ref label id val # 1 Ref1 Label01 A1 1.11 # 2 Ref1 Label01 D4 4.44 # 3 Ref2 Label02 C2 2.22 # 4 Ref3 <NA> B3 3.33 merge.with.order( x=x, y=y, by='ref', all.y = T, sort=F ,keep_order = 1) # > merge.with.order( x=x, y=y, by='ref', all.y = T, sort=F ,keep_order = 1) # ref label id val # 1 Ref1 Label01 A1 1.11 # 2 Ref1 Label01 D4 4.44 # 3 Ref2 Label02 C2 2.22 # 4 Ref3 <NA> B3 3.33 merge.with.order( x=x, y=y, by='ref', all.y = T, sort=F ,keep_order = 2) # > merge.with.order( x=x, y=y, by='ref', all.y = T, sort=F ,keep_order = 2) # ref label id val # 1 Ref1 Label01 A1 1.11 # 3 Ref2 Label02 C2 2.22 # 4 Ref3 <NA> B3 3.33 # 2 Ref1 Label01 D4 4.44
Created by Pretty R at inside-R.org
-----