CDH Manual installation in single Node

Cloudera Distribution Apache Hadoop single Node Installation Step by Step guide Centos 7. Completely manual process.

# yum install java

# java --version
openjdk version "1.8.0_181"
OpenJDK Runtime Environment (build 1.8.0_181-b13)
OpenJDK 64-Bit Server VM (build 25.181-b13, mixed mode)

# useradd -g hadoop hadoop

# passwd hadoop
Changing password for user hadoop.
New password:
BAD PASSWORD: The password contains the user name in some form
Retype new password:
passwd: all authentication tokens updated successfully.

# su - hadoop

$ ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/home/hadoop/.ssh/id_rsa):
Created directory '/home/hadoop/.ssh'.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /home/hadoop/.ssh/id_rsa.
Your public key has been saved in /home/hadoop/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:n4d58SIkG9mWM8P6QS8YjxCNxbkhaYKhCNJwFH8a3A8 hadoop@hyd-hyd-hadoop-test.arkit.co.in
The key's randomart image is:
+---[RSA 2048]----+
|++=+   o..       |
|+ooo..+++        |
|o   +oE..o       |
|     + +.+ .     |
|    . . S @ .    |
|       . # O o   |
|        = X = .  |
|         . * .   |
|          .      |
+----[SHA256]-----+

$ cat ~/.ssh/id_rsa.pub
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkk39QsAbOJwMVy7AenkzYg6nR9YDhVaqwUna9bR2Tu8XsHUYy+x9TiWtrj+3awAb8uuMHqV3Q+aW2Pe3FEwJvWRv0j3tqhYD9UgM4blMAEdHnLhj0bO+AT+y3yhxCcb+7+WEgv1B0pz9HaD8D4NDiYIizUktcetYT3SswS3vZxJrSDr5BsepnHQDMcAgF4xpZwXIbJaoNuQIlO93+L/2OiIqlsjb2BKiBf5EoNTFZt9lJVvCyiV29ujH2eb6zktyIn+D7Z/591Esk1zEcIwIjsjvTJFpb9HXYHJqQhAYC0VrHNRnzRRyGJguzGkFUK8ppTbI7P+W29KBDbpfg7T8z hadoop@hyd-hadoop-test.arkit.co.in

$ cd ~/.ssh/
$ ls
id_rsa  id_rsa.pub
$ touch authorized_keys
$ vi authorized_keys
### Paste Above id_rsa.pub content ###
$ chmod 0600 authorized_keys

Downloaded Java fro m oracle site and installed
# yum localinstall jdk-8u172-linux-x64.rpm

Installed:
  jdk1.8.x86_64 2000:1.8.0_172-fcs

Complete!

# export JAVA_HOME=/usr/java/jdk1.8.0_172-amd64/



# sudo yum clean all; sudo yum install hadoop-yarn-resourcemanager
# sudo yum clean all; sudo yum install hadoop-hdfs-namenode
# sudo yum clean all; sudo yum install hadoop-hdfs-secondarynamenode
# sudo yum clean all; sudo yum install hadoop-yarn-nodemanager hadoop-hdfs-datanode hadoop-mapreduce -y
# sudo yum clean all; sudo yum install hadoop-mapreduce-historyserver hadoop-yarn-proxyserver -y 
# sudo yum clean all; sudo yum install hadoop-client

--Disable Firewall
systemctl stop firewalld/iptables/ip6tables
systemctl disable firewalld/iptables/ip6tables

service stop iptables
service stop ip6tables

chkconfig iptables off
chkconfig ip6tables off

--Disable SELinux 
Edit file /etc/selinux/config

SELINUX=disabled

Note: Reboot is required to take effect

root@hyd-hadoop-test:~# cp -r /etc/hadoop/conf.empty/ /etc/hadoop/conf.my_cluster

root@hyd-hadoop-test:~# sudo alternatives --install /etc/hadoop/conf hadoop-conf /etc/hadoop/conf.my_cluster 50

root@hyd-hadoop-test:~# sudo alternatives --set hadoop-conf /etc/hadoop/conf.my_cluster


root@hyd-hadoop-test:~# sudo alternatives --display hadoop-conf
hadoop-conf - status is manual.
 link currently points to /etc/hadoop/conf.my_cluster
/etc/hadoop/conf.empty - priority 10
/etc/hadoop/conf.impala - priority 5
/etc/hadoop/conf.my_cluster - priority 50
Current `best' version is /etc/hadoop/conf.my_cluster.

Edit the configuration file and add
# cat /etc/hadoop/conf.my_cluster/core-site.xml

<configuration>
        <property>
          <name>fs.defaultFS</name>
          <value>hdfs://hyd-hadoop-test:8020</value>
          <description>NameNode URI</description>
        </property>

        <property>
          <name>fs.trash.interval</name>
          <value>30</value>
        </property>

        <property>
          <name>fs.trash.checkpoint.interval</name>
          <value>31</value>
        </property>

</configuration>

# cat /etc/hadoop/conf.my_cluster/hdfs-site.xml
<configuration>
  <property>
     <name>dfs.namenode.name.dir</name>
     <value>file:///data/hadoop-hdfs/cache/hdfs/dfs/name</value>
  </property>

  <property>
     <name>dfs.datanode.name.dir</name>
     <value>file:///data/hadoop-hdfs/cache/hdfs/dfs/data</value>
  </property>


  <property>
     <name>dfs.permissions.superusergroup</name>
     <value>hadoop</value>
  </property>

  <property>
     <name>dfs.webhdfs.enabled</name>
     <value>true</value>
  </property>

</configuration>


root@hyd-hadoop-test:~# mkdir -p /data/hadoop-hdfs/cache/hdfs/dfs/name

root@hyd-hadoop-test:~# mkdir -p /data/hadoop-hdfs/cache/hdfs/dfs/data

root@hyd-hadoop-test:~# chown -R hdfs:hdfs /data/hadoop-hdfs/cache/hdfs/dfs/data/

root@hyd-hadoop-test:~# chown -R hdfs:hdfs /data/hadoop-hdfs/cache/hdfs/dfs/name/


root@hyd-hadoop-test:~# chmod 775 /data/hadoop-hdfs/cache/hdfs/dfs/name

root@hyd-hadoop-test:~# chmod 775 /data/hadoop-hdfs/cache/hdfs/dfs/data/


format namenode
# sudo -u hdfs hdfs namenode -format

18/08/14 12:53:13 INFO namenode.FSImage: Allocated new BlockPoolId: BP-90611475-10.103.2.104-1534231393025
18/08/14 12:53:13 INFO common.Storage: Storage directory /data/hadoop-hdfs/cache/hdfs/dfs/name has been successfully formatted.
18/08/14 12:53:13 INFO namenode.FSImageFormatProtobuf: Saving image file /data/hadoop-hdfs/cache/hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 using no compression
18/08/14 12:53:13 INFO namenode.FSImageFormatProtobuf: Image file /data/hadoop-hdfs/cache/hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 of size 317 bytes saved in 0 seconds.
18/08/14 12:53:13 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
18/08/14 12:53:13 INFO util.ExitUtil: Exiting with status 0
18/08/14 12:53:13 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at hyd-hadoop-test/192.168.2.5
************************************************************/

Start HDFS
root@hyd-hadoop-test:~# for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done
starting datanode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-datanode-hyd-hyd-hadoop-test.arkit.co.in.out
Started Hadoop datanode (hadoop-hdfs-datanode):            [  OK  ]
starting namenode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-namenode-hyd-hyd-hadoop-test.arkit.co.in.out
Started Hadoop namenode:                                   [  OK  ]
starting secondarynamenode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-secondarynamenode-hyd-hyd-hadoop-test.arkit.co.in.out
Started Hadoop secondarynamenode:                          [  OK  ]


# sudo -u hdfs hadoop fs -mkdir /tmp

# sudo -u hdfs hadoop fs -chmod -R 1777 /tmp

sudo -u hdfs hadoop fs -ls /

root@hyd-hadoop-test:~# cat /etc/hadoop/conf.my_cluster/mapred-site.xml

<configuration>
        <property>
          <name>mapreduce.fremework.name</name>
          <value>yarn</value>
        </property>

        <property>
          <name>mapreduce.jobhistory.address</name>
          <value>hyd-hadoop-test.arkit.co.in:10020</value>
        </property>

        <property>
          <name>mapreduce.jobhistory.webapp.address</name>
          <value>hyd-hadoop-test.arkit.co.in:19888</value>
        </property>


</configuration>

root@hyd-hadoop-test:~# cat /etc/hadoop/conf.my_cluster/yarn-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>

  <property>
    <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
  </property>

  <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
  </property>

  <property>
    <description>List of directories to store localized files in.</description>
    <name>yarn.nodemanager.local-dirs</name>
    <value>file:///var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
  </property>

  <property>
    <description>Where to store container logs.</description>
    <name>yarn.nodemanager.log-dirs</name>
    <value>file:///var/log/hadoop-yarn/containers</value>
  </property>

  <property>
    <description>Where to aggregate logs to.</description>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>hdfs://var/log/hadoop-yarn/apps</value>
  </property>

  <property>
    <description>Classpath for typical applications.</description>
     <name>yarn.application.classpath</name>
     <value>
        $HADOOP_CONF_DIR,
        $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
        $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
        $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
        $HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*
     </value>
  </property>


  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address</name>
    <value>hyd-hadoop-test.arkit.co.in:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address</name>
    <value>hyd-hadoop-test.arkit.co.in:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address</name>
    <value>hyd-hadoop-test.arkit.co.in:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address</name>
    <value>hyd-hadoop-test.arkit.co.in:8033</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.adress</name>
    <value>hyd-hadoop-test.arkit.co.in:8088</value>
   </property>
  <property>
        <name>yarn.log.aggregation-enable</name>
        <value>true</value>
   </property>

</configuration>

Adding Environment Variables for Haddop component
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_COMMON_HOME=/usr/lib/hadoop

root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_HDFS_HOME=/usr/lib/hadoop/hadoop-hdfs

root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce

root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_YARN_HOME=/usr/lib/hadoop-yarn

root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_CONF_DIR=/usr/lib/hadoop/etc/hadoop


root@hyd-hadoop-test:~# mkdir -p /data/yarn/local
--date/time-->  08/14/18 15:02:35
root@hyd-hadoop-test:~# mkdir -p /data/yarn/logs

root@hyd-hadoop-test:~# chown -R yarn:yarn /data/yarn/local
--date/time-->  08/14/18 15:03:11
root@hyd-hadoop-test:~# chown -R yarn:yarn /data/yarn/logs/

root@hyd-hadoop-test:~# sudo service hadoop-yarn-resourcemanager start
starting resourcemanager, logging to /var/log/hadoop-yarn/yarn-yarn-resourcemanager-hyd-hadoop-test.arkit.co.in.out
Started Hadoop resourcemanager:                            [  OK  ]
--date/time-->  08/14/18 15:11:56
root@hyd-hadoop-test:~# sudo service hadoop-yarn-nodemanager start
starting nodemanager, logging to /var/log/hadoop-yarn/yarn-yarn-nodemanager-hyd-hadoop-test.arkit.co.in.out
Started Hadoop nodemanager:                                [  OK  ]
--date/time-->  08/14/18 15:12:10
root@hyd-hadoop-test:~# sudo service hadoop-mapreduce-historyserver start
starting historyserver, logging to /var/log/hadoop-mapreduce/mapred-mapred-historyserver-hyd-hadoop-test.arkit.co.in.out
18/08/14 15:12:17 INFO hs.JobHistoryServer: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting JobHistoryServer
STARTUP_MSG:   user = mapred
STARTUP_MSG:   host = hyd-hadoop-test/192.168.2.5
STARTUP_MSG:   args = []
STARTUP_MSG:   version = 2.6.0-cdh5.15.0
STARTUP_MSG:   build = http://github.com/cloudera/hadoop -r e3cb23a1cb2b89d074171b44e71f207c3d6ffa50; compiled by 'jenkins' on 2018-05-24T11:19Z
STARTUP_MSG:   java = 1.8.0_181
Started Hadoop historyserver:                              [  OK  ]

root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -mkdir -p /user/hdfs

root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -mkdir -p /user/yarn

root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -chown yarn:yarn /user/yarn

## Install all CDH components
yum install hive hive-metastore hive-server2 hive-hbase whirr sqoop2-server sqoop2-client sqoop spark-core spark-master spark-worker spark-history-server spark-python hbase-solr-indexer hbase-solr-doc solr-mapreduce solr-crunch solr-server pig mahout hadoop-kms hadoop-kms-server impala*  hue hadoop-httpfs oozie


Hue Configuration
# vi /etc/hadoop/conf.my_cluster/core-site.xml
<!-- Hue WebHDFS proxy user setting -->
        <property>
          <name>hadoop.proxyuser.hue.hosts</name>
          <value>*</value>
        </property>
        <property>
          <name>hadoop.proxyuser.hue.groups</name>
          <value>*</value>
        </property>


oozie configuraion
# vi /etc/oozie/conf/oozie-site.xml
    <property>
        <name>oozie.service.ProxyUserService.proxyuser.hue.hosts</name>
        <value>*</value>
    </property>

    <property>
        <name>oozie.service.ProxyUserService.proxyuser.hue.groups</name>
        <value>*</value>
    </property>


Add Server IP Address abd Port Number
# vi /etc/hue/conf.empty/hue.ini

## Webserver listens on this address and port 
http_host=192.168.2.5
http_port=8888

Beeswax configuration
[beeswax]

  # Host where HiveServer2 is running.
  # If Kerberos security is enabled, use fully-qualified domain name (FQDN).
  hive_server_host=192.168.2.5

  # Port where HiveServer2 Thrift server runs on.
  hive_server_port=10000

  # Hive configuration directory, where hive-site.xml is located
  hive_conf_dir=/usr/lib/hive/conf/

  # Timeout in seconds for thrift calls to Hive service
  ## server_conn_timeout=120


[impala]
  # Host of the Impala Server (one of the Impalad)
  server_host=192.168.2.5

  # Port of the Impala Server
  server_port=21050

  # Kerberos principal
  ## impala_principal=impala/hostname.foo.com

  # Turn on/off impersonation mechanism when talking to Impala
  impersonation_enabled=False

### Database configuration
[[[mysql]]]
     name=mysqldb
      engine=mysql
      host=localhost
       port=3306
      user=root
       password=temp
   options={}



To Connect Database Installing Mariadb in Centos 7
yum install mariadb* -y 

# mysql -u root -ptemp

MariaDB [(none)]> create database mysqldb;
Query OK, 1 row affected (0.00 sec)

MariaDB [(none)]> show databases;
+--------------------+
| Database           |
+--------------------+
| information_schema |
| mysql              |
| mysqldb            |
| performance_schema |
+--------------------+
4 rows in set (0.00 sec)

MariaDB [(none)]> exit
Bye

root@hyd-hadoop-test:/usr/lib/hue/pids# /etc/init.d/hive-server2 status
Hive Server2 is not running                                [FAILED]
--date/time-->  08/14/18 18:04:16
root@hyd-hadoop-test:/usr/lib/hue/pids# /etc/init.d/hive-server2 restart
Stopped Hive Server2:                                      [  OK  ]
Started Hive Server2 (hive-server2):                       [  OK  ]
--date/time-->  08/14/18 18:04:25
root@hyd-hadoop-test:/usr/lib/hue/pids# systemctl enable hive-server2
hive-server2.service is not a native service, redirecting to /sbin/chkconfig.
Executing /sbin/chkconfig hive-server2 on


2 comments: