CDH Manual installation in single Node

Cloudera Distribution Apache Hadoop single Node Installation Step by Step guide Centos 7. Completely manual process.

# yum install java

# java --version
openjdk version "1.8.0_181"
OpenJDK Runtime Environment (build 1.8.0_181-b13)
OpenJDK 64-Bit Server VM (build 25.181-b13, mixed mode)

# useradd -g hadoop hadoop

# passwd hadoop
Changing password for user hadoop.
New password:
BAD PASSWORD: The password contains the user name in some form
Retype new password:
passwd: all authentication tokens updated successfully.

# su - hadoop

$ ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/home/hadoop/.ssh/id_rsa):
Created directory '/home/hadoop/.ssh'.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /home/hadoop/.ssh/id_rsa.
Your public key has been saved in /home/hadoop/.ssh/
The key fingerprint is:
The key's randomart image is:
$ cat ~/.ssh/
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkk39QsAbOJwMVy7AenkzYg6nR9YDhVaqwUna9bR2Tu8XsHUYy+x9TiWtrj+3awAb8uuMHqV3Q+aW2Pe3FEwJvWRv0j3tqhYD9UgM4blMAEdHnLhj0bO+AT+y3yhxCcb+7+WEgv1B0pz9HaD8D4NDiYIizUktcetYT3SswS3vZxJrSDr5BsepnHQDMcAgF4xpZwXIbJaoNuQIlO93+L/2OiIqlsjb2BKiBf5EoNTFZt9lJVvCyiV29ujH2eb6zktyIn+D7Z/591Esk1zEcIwIjsjvTJFpb9HXYHJqQhAYC0VrHNRnzRRyGJguzGkFUK8ppTbI7P+W29KBDbpfg7T8z

$ cd ~/.ssh/
$ ls
$ touch authorized_keys
$ vi authorized_keys
### Paste Above content ###
$ chmod 0600 authorized_keys

Downloaded Java fro m oracle site and installed
# yum localinstall jdk-8u172-linux-x64.rpm

  jdk1.8.x86_64 2000:1.8.0_172-fcs


# export JAVA_HOME=/usr/java/jdk1.8.0_172-amd64/

# sudo yum clean all; sudo yum install hadoop-yarn-resourcemanager
# sudo yum clean all; sudo yum install hadoop-hdfs-namenode
# sudo yum clean all; sudo yum install hadoop-hdfs-secondarynamenode
# sudo yum clean all; sudo yum install hadoop-yarn-nodemanager hadoop-hdfs-datanode hadoop-mapreduce -y
# sudo yum clean all; sudo yum install hadoop-mapreduce-historyserver hadoop-yarn-proxyserver -y 
# sudo yum clean all; sudo yum install hadoop-client

--Disable Firewall
systemctl stop firewalld/iptables/ip6tables
systemctl disable firewalld/iptables/ip6tables

service stop iptables
service stop ip6tables

chkconfig iptables off
chkconfig ip6tables off

--Disable SELinux 
Edit file /etc/selinux/config


Note: Reboot is required to take effect

root@hyd-hadoop-test:~# cp -r /etc/hadoop/conf.empty/ /etc/hadoop/conf.my_cluster

root@hyd-hadoop-test:~# sudo alternatives --install /etc/hadoop/conf hadoop-conf /etc/hadoop/conf.my_cluster 50

root@hyd-hadoop-test:~# sudo alternatives --set hadoop-conf /etc/hadoop/conf.my_cluster

root@hyd-hadoop-test:~# sudo alternatives --display hadoop-conf
hadoop-conf - status is manual.
 link currently points to /etc/hadoop/conf.my_cluster
/etc/hadoop/conf.empty - priority 10
/etc/hadoop/conf.impala - priority 5
/etc/hadoop/conf.my_cluster - priority 50
Current `best' version is /etc/hadoop/conf.my_cluster.

Edit the configuration file and add
# cat /etc/hadoop/conf.my_cluster/core-site.xml

          <description>NameNode URI</description>




# cat /etc/hadoop/conf.my_cluster/hdfs-site.xml





root@hyd-hadoop-test:~# mkdir -p /data/hadoop-hdfs/cache/hdfs/dfs/name

root@hyd-hadoop-test:~# mkdir -p /data/hadoop-hdfs/cache/hdfs/dfs/data

root@hyd-hadoop-test:~# chown -R hdfs:hdfs /data/hadoop-hdfs/cache/hdfs/dfs/data/

root@hyd-hadoop-test:~# chown -R hdfs:hdfs /data/hadoop-hdfs/cache/hdfs/dfs/name/

root@hyd-hadoop-test:~# chmod 775 /data/hadoop-hdfs/cache/hdfs/dfs/name

root@hyd-hadoop-test:~# chmod 775 /data/hadoop-hdfs/cache/hdfs/dfs/data/

format namenode
# sudo -u hdfs hdfs namenode -format

18/08/14 12:53:13 INFO namenode.FSImage: Allocated new BlockPoolId: BP-90611475-
18/08/14 12:53:13 INFO common.Storage: Storage directory /data/hadoop-hdfs/cache/hdfs/dfs/name has been successfully formatted.
18/08/14 12:53:13 INFO namenode.FSImageFormatProtobuf: Saving image file /data/hadoop-hdfs/cache/hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 using no compression
18/08/14 12:53:13 INFO namenode.FSImageFormatProtobuf: Image file /data/hadoop-hdfs/cache/hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 of size 317 bytes saved in 0 seconds.
18/08/14 12:53:13 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
18/08/14 12:53:13 INFO util.ExitUtil: Exiting with status 0
18/08/14 12:53:13 INFO namenode.NameNode: SHUTDOWN_MSG:
SHUTDOWN_MSG: Shutting down NameNode at hyd-hadoop-test/

Start HDFS
root@hyd-hadoop-test:~# for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done
starting datanode, logging to /var/log/hadoop-hdfs/
Started Hadoop datanode (hadoop-hdfs-datanode):            [  OK  ]
starting namenode, logging to /var/log/hadoop-hdfs/
Started Hadoop namenode:                                   [  OK  ]
starting secondarynamenode, logging to /var/log/hadoop-hdfs/
Started Hadoop secondarynamenode:                          [  OK  ]

# sudo -u hdfs hadoop fs -mkdir /tmp

# sudo -u hdfs hadoop fs -chmod -R 1777 /tmp

sudo -u hdfs hadoop fs -ls /

root@hyd-hadoop-test:~# cat /etc/hadoop/conf.my_cluster/mapred-site.xml





root@hyd-hadoop-test:~# cat /etc/hadoop/conf.my_cluster/yarn-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>




    <description>List of directories to store localized files in.</description>

    <description>Where to store container logs.</description>

    <description>Where to aggregate logs to.</description>

    <description>Classpath for typical applications.</description>



Adding Environment Variables for Haddop component
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_COMMON_HOME=/usr/lib/hadoop

root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_HDFS_HOME=/usr/lib/hadoop/hadoop-hdfs

root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce

root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_YARN_HOME=/usr/lib/hadoop-yarn

root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_CONF_DIR=/usr/lib/hadoop/etc/hadoop

root@hyd-hadoop-test:~# mkdir -p /data/yarn/local
root@hyd-hadoop-test:~# mkdir -p /data/yarn/logs

root@hyd-hadoop-test:~# chown -R yarn:yarn /data/yarn/local
root@hyd-hadoop-test:~# chown -R yarn:yarn /data/yarn/logs/

root@hyd-hadoop-test:~# sudo service hadoop-yarn-resourcemanager start
starting resourcemanager, logging to /var/log/hadoop-yarn/
Started Hadoop resourcemanager:                            [  OK  ]
root@hyd-hadoop-test:~# sudo service hadoop-yarn-nodemanager start
starting nodemanager, logging to /var/log/hadoop-yarn/
Started Hadoop nodemanager:                                [  OK  ]
root@hyd-hadoop-test:~# sudo service hadoop-mapreduce-historyserver start
starting historyserver, logging to /var/log/hadoop-mapreduce/
18/08/14 15:12:17 INFO hs.JobHistoryServer: STARTUP_MSG:
STARTUP_MSG: Starting JobHistoryServer
STARTUP_MSG:   user = mapred
STARTUP_MSG:   host = hyd-hadoop-test/
STARTUP_MSG:   args = []
STARTUP_MSG:   version = 2.6.0-cdh5.15.0
STARTUP_MSG:   build = -r e3cb23a1cb2b89d074171b44e71f207c3d6ffa50; compiled by 'jenkins' on 2018-05-24T11:19Z
STARTUP_MSG:   java = 1.8.0_181
Started Hadoop historyserver:                              [  OK  ]

root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -mkdir -p /user/hdfs

root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -mkdir -p /user/yarn

root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -chown yarn:yarn /user/yarn

## Install all CDH components
yum install hive hive-metastore hive-server2 hive-hbase whirr sqoop2-server sqoop2-client sqoop spark-core spark-master spark-worker spark-history-server spark-python hbase-solr-indexer hbase-solr-doc solr-mapreduce solr-crunch solr-server pig mahout hadoop-kms hadoop-kms-server impala*  hue hadoop-httpfs oozie

Hue Configuration
# vi /etc/hadoop/conf.my_cluster/core-site.xml
<!-- Hue WebHDFS proxy user setting -->

oozie configuraion
# vi /etc/oozie/conf/oozie-site.xml


Add Server IP Address abd Port Number
# vi /etc/hue/conf.empty/hue.ini

## Webserver listens on this address and port 

Beeswax configuration

  # Host where HiveServer2 is running.
  # If Kerberos security is enabled, use fully-qualified domain name (FQDN).

  # Port where HiveServer2 Thrift server runs on.

  # Hive configuration directory, where hive-site.xml is located

  # Timeout in seconds for thrift calls to Hive service
  ## server_conn_timeout=120

  # Host of the Impala Server (one of the Impalad)

  # Port of the Impala Server

  # Kerberos principal
  ## impala_principal=impala/

  # Turn on/off impersonation mechanism when talking to Impala

### Database configuration

To Connect Database Installing Mariadb in Centos 7
yum install mariadb* -y 

# mysql -u root -ptemp

MariaDB [(none)]> create database mysqldb;
Query OK, 1 row affected (0.00 sec)

MariaDB [(none)]> show databases;
| Database           |
| information_schema |
| mysql              |
| mysqldb            |
| performance_schema |
4 rows in set (0.00 sec)

MariaDB [(none)]> exit

root@hyd-hadoop-test:/usr/lib/hue/pids# /etc/init.d/hive-server2 status
Hive Server2 is not running                                [FAILED]
root@hyd-hadoop-test:/usr/lib/hue/pids# /etc/init.d/hive-server2 restart
Stopped Hive Server2:                                      [  OK  ]
Started Hive Server2 (hive-server2):                       [  OK  ]
root@hyd-hadoop-test:/usr/lib/hue/pids# systemctl enable hive-server2
hive-server2.service is not a native service, redirecting to /sbin/chkconfig.
Executing /sbin/chkconfig hive-server2 on