Home‎ > ‎

How to read all files in a directory in HDFS using Hadoop filesystem API

The following is the code to read all files in a directory in HDFS file system
 
1. Open File cat.java and paste the following code
package org.myorg;
import java.io.*;
import java.util.*;
import java.net.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
public class cat{
        public static void main (String [] args) throws Exception{
                try{
                        FileSystem fs = FileSystem.get(new Configuration());
                        FileStatus[] status = fs.listStatus(new Path("hdfs://jp.seka.com:9000/user/jeka/in"));
                        for (int i=0;i<status.length;i++){
                                BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(status[i].getPath())));
                                String line;
                                line=br.readLine();
                                while (line != null){
                                        System.out.println(line);
                                        line=br.readLine();
                                }
                        }
                }catch(Exception e){
                        System.out.println("File not found");
                }
        }
}
 
2. Compile the code
javac -classpath hadoop-0.20.1-dev-core.jar -d cat/ cat.java
 
3. Create jar
jar -cvf cat.jar -C cat/ .
 
4. Run
hadoop jar cat.jar org.myorg.cat
 
Comments