merge dataframes and still preserve the original order

Post date: Apr 2, 2014 8:19:07 AM

I copied and pasted the R code below from Tal Galili. All the credits go to the original blog:

http://www.r-statistics.com/2012/01/merging-two-data-frame-objects-while-preserving-the-rows-order/

# ========================================================= # Merge while preserving the order  # ========================================================= merge.with.order <- function(x,y, ..., sort = T, keep_order) {   # this function works just like merge, only that it adds the option to return the merged data.frame ordered by x (1) or by y (2)   add.id.column.to.data <- function(DATA)   {     data.frame(DATA, id... = seq_len(nrow(DATA)))   }   # add.id.column.to.data(data.frame(x = rnorm(5), x2 = rnorm(5)))   order.by.id...and.remove.it <- function(DATA)   {     # gets in a data.frame with the "id..." column.  Orders by it and returns it     if(!any(colnames(DATA)=="id...")) stop("The function order.by.id...and.remove.it only works with data.frame objects which includes the 'id...' order column")       ss_r <- order(DATA$id...)     ss_c <- colnames(DATA) != "id..."     DATA[ss_r, ss_c]       }     # tmp <- function(x) x==1; 1 # why we must check what to do if it is missing or not...   # tmp()     if(!missing(keep_order))   {     if(keep_order == 1) return(order.by.id...and.remove.it(merge(x=add.id.column.to.data(x),y=y,..., sort = FALSE)))     if(keep_order == 2) return(order.by.id...and.remove.it(merge(x=x,y=add.id.column.to.data(y),..., sort = FALSE)))     # if you didn't get "return" by now - issue a warning.     warning("The function merge.with.order only accepts NULL/1/2 values for the keep_order variable")   } else {return(merge(x=x,y=y,..., sort = sort))} }     # ================================================================= #                        Demo # ================================================================= x <- data.frame(   ref = c( 'Ref1', 'Ref2' )   , label = c( 'Label01', 'Label02' ) ) y <- data.frame(   id = c( 'A1', 'C2', 'B3', 'D4' )   , ref = c( 'Ref1', 'Ref2' , 'Ref3','Ref1' )   , val = c( 1.11, 2.22, 3.33, 4.44 ) )   # > x #    ref   label # 1 Ref1 Label01 # 2 Ref2 Label02   # > y #   id  ref  val # 1 A1 Ref1 1.11 # 2 C2 Ref2 2.22 # 3 B3 Ref3 3.33 # 4 D4 Ref1 4.44   merge(x=x, y=y, by='ref', all.y = T, sort=F ) # > merge(x=x, y=y, by='ref', all.y = T, sort=F ) #    ref   label id  val # 1 Ref1 Label01 A1 1.11 # 2 Ref1 Label01 D4 4.44 # 3 Ref2 Label02 C2 2.22 # 4 Ref3    <NA> B3 3.33   merge.with.order( x=x, y=y, by='ref', all.y = T, sort=F ,keep_order = 1) # > merge.with.order( x=x, y=y, by='ref', all.y = T, sort=F ,keep_order = 1) #    ref   label id  val # 1 Ref1 Label01 A1 1.11 # 2 Ref1 Label01 D4 4.44 # 3 Ref2 Label02 C2 2.22 # 4 Ref3    <NA> B3 3.33   merge.with.order( x=x, y=y, by='ref', all.y = T, sort=F ,keep_order = 2) # > merge.with.order( x=x, y=y, by='ref', all.y = T, sort=F ,keep_order = 2) #    ref   label id  val # 1 Ref1 Label01 A1 1.11 # 3 Ref2 Label02 C2 2.22 # 4 Ref3    <NA> B3 3.33 # 2 Ref1 Label01 D4 4.44
Created by Pretty R at inside-R.org

-----