Sources:http://hadoop.apache.org/
Fundamentals:
Operations:
Setting up a Hadoop Cluster: see hadoop-setup
Administering Hadoop:
Related Projects:
Data Formats: Avro, Parquet
Data Ingestion: Flume, Sqoop
Data Processing:Pig,Spark, Hive, Crunch
Storage:Hbase
Coordination:Zookeeper
CDH-- http://<HOST>:7180/cmf/home
#Hadoop Console commands:
hadoop fs -ls / #To list all files in '/'
hdfs fs -mkdir -p /user/rohit #To create folder(s)
hdfs fs -put <SOURCE_FILE> <DESTINATION_ON_HDFS>
#Hadoop setup for s3 access: For public bucket- AnnonymopusAWSCredentialsProvider class is used..
hadoop fs -D fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider -ls s3a://<BUCKET_NAME>/
**This gives error ClassNotFoundException
Soln- Created ~/.hadooprc with content below
hadoop_add_to_classpath_tools hadoop-aws
Set JAVA HOME and export HADOOP_OPTIONAL_TOOLS="hadoop-aws" in hadoop-env.sh
#After setting below properties in core-site.xml can run ls directly for any bucket
<property>
<name>fs.s3a.access.key</name>
<value></value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value></value>
</property>
<property>
<name>fs.s3.awsAccessKeyId</name>
<value></value>
</property>
<property>
<name>fs.s3.awsSecretAccessKey</name>
<value></value>
</property>
<property>
<name>fs.s3n.awsAccessKeyId</name>
<value></value>
</property>
<property>
<name>fs.s3n.awsSecretAccessKey</name>
<value></value>
</property>
<property>
<name>mumbai.endpoint</name>
<value>s3.dualstack.ap-south-1.amazonaws.com</value>
</property>
<property>
<name>fs.s3n.bucket.bigscale-flume.endpoint</name>
<value>${mumbai.endpoint}</value>
</property>
<property>
<name>fs.s3a.fast.upload.buffer</name>
<value>disk</value>
</property>
<property>
<name>fs.s3a.multipart.size</name>
<value>100M</value>
</property>
<property>
<name>fs.s3a.fast.upload.active.blocks</name>
<value>8</value>
</property>
<property>
<name>fs.s3a.aws.credentials.provider</name>
<value>
org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,
com.amazonaws.auth.EnvironmentVariableCredentialsProvider,
com.amazonaws.auth.InstanceProfileCredentialsProvider
</value>
</property>
hadoop fs -ls s3a://<BUCKET_NAME>/