Cloudera Distribution Apache Hadoop single Node Installation Step by Step guide Centos 7. Completely manual process.
# yum install java
# java --version
openjdk version "1.8.0_181"
OpenJDK Runtime Environment (build 1.8.0_181-b13)
OpenJDK 64-Bit Server VM (build 25.181-b13, mixed mode)
# useradd -g hadoop hadoop
# passwd hadoop
Changing password for user hadoop.
New password:
BAD PASSWORD: The password contains the user name in some form
Retype new password:
passwd: all authentication tokens updated successfully.
# su - hadoop
$ ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/home/hadoop/.ssh/id_rsa):
Created directory '/home/hadoop/.ssh'.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /home/hadoop/.ssh/id_rsa.
Your public key has been saved in /home/hadoop/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:n4d58SIkG9mWM8P6QS8YjxCNxbkhaYKhCNJwFH8a3A8 hadoop@hyd-hyd-hadoop-test.arkit.co.in
The key's randomart image is:
+---[RSA 2048]----+
|++=+ o.. |
|+ooo..+++ |
|o +oE..o |
| + +.+ . |
| . . S @ . |
| . # O o |
| = X = . |
| . * . |
| . |
+----[SHA256]-----+
$ cat ~/.ssh/id_rsa.pub
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkk39QsAbOJwMVy7AenkzYg6nR9YDhVaqwUna9bR2Tu8XsHUYy+x9TiWtrj+3awAb8uuMHqV3Q+aW2Pe3FEwJvWRv0j3tqhYD9UgM4blMAEdHnLhj0bO+AT+y3yhxCcb+7+WEgv1B0pz9HaD8D4NDiYIizUktcetYT3SswS3vZxJrSDr5BsepnHQDMcAgF4xpZwXIbJaoNuQIlO93+L/2OiIqlsjb2BKiBf5EoNTFZt9lJVvCyiV29ujH2eb6zktyIn+D7Z/591Esk1zEcIwIjsjvTJFpb9HXYHJqQhAYC0VrHNRnzRRyGJguzGkFUK8ppTbI7P+W29KBDbpfg7T8z hadoop@hyd-hadoop-test.arkit.co.in
$ cd ~/.ssh/
$ ls
id_rsa id_rsa.pub
$ touch authorized_keys
$ vi authorized_keys
### Paste Above id_rsa.pub content ###
$ chmod 0600 authorized_keys
Downloaded Java fro m oracle site and installed
# yum localinstall jdk-8u172-linux-x64.rpm
Installed:
jdk1.8.x86_64 2000:1.8.0_172-fcs
Complete!
# export JAVA_HOME=/usr/java/jdk1.8.0_172-amd64/
# sudo yum clean all; sudo yum install hadoop-yarn-resourcemanager
# sudo yum clean all; sudo yum install hadoop-hdfs-namenode
# sudo yum clean all; sudo yum install hadoop-hdfs-secondarynamenode
# sudo yum clean all; sudo yum install hadoop-yarn-nodemanager hadoop-hdfs-datanode hadoop-mapreduce -y
# sudo yum clean all; sudo yum install hadoop-mapreduce-historyserver hadoop-yarn-proxyserver -y
# sudo yum clean all; sudo yum install hadoop-client
root@hyd-hadoop-test:~# cp -r /etc/hadoop/conf.empty/ /etc/hadoop/conf.my_cluster
root@hyd-hadoop-test:~# sudo alternatives --install /etc/hadoop/conf hadoop-conf /etc/hadoop/conf.my_cluster 50
root@hyd-hadoop-test:~# sudo alternatives --set hadoop-conf /etc/hadoop/conf.my_cluster
root@hyd-hadoop-test:~# sudo alternatives --display hadoop-conf
hadoop-conf - status is manual.
link currently points to /etc/hadoop/conf.my_cluster
/etc/hadoop/conf.empty - priority 10
/etc/hadoop/conf.impala - priority 5
/etc/hadoop/conf.my_cluster - priority 50
Current `best' version is /etc/hadoop/conf.my_cluster.
Edit the configuration file and add
# cat /etc/hadoop/conf.my_cluster/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hyd-hadoop-test:8020</value>
<description>NameNode URI</description>
</property>
<property>
<name>fs.trash.interval</name>
<value>30</value>
</property>
<property>
<name>fs.trash.checkpoint.interval</name>
<value>31</value>
</property>
</configuration>
# cat /etc/hadoop/conf.my_cluster/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/hadoop-hdfs/cache/hdfs/dfs/name</value>
</property>
<property>
<name>dfs.datanode.name.dir</name>
<value>file:///data/hadoop-hdfs/cache/hdfs/dfs/data</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
root@hyd-hadoop-test:~# mkdir -p /data/hadoop-hdfs/cache/hdfs/dfs/name
root@hyd-hadoop-test:~# mkdir -p /data/hadoop-hdfs/cache/hdfs/dfs/data
root@hyd-hadoop-test:~# chown -R hdfs:hdfs /data/hadoop-hdfs/cache/hdfs/dfs/data/
root@hyd-hadoop-test:~# chown -R hdfs:hdfs /data/hadoop-hdfs/cache/hdfs/dfs/name/
root@hyd-hadoop-test:~# chmod 775 /data/hadoop-hdfs/cache/hdfs/dfs/name
root@hyd-hadoop-test:~# chmod 775 /data/hadoop-hdfs/cache/hdfs/dfs/data/
format namenode
# sudo -u hdfs hdfs namenode -format
18/08/14 12:53:13 INFO namenode.FSImage: Allocated new BlockPoolId: BP-90611475-10.103.2.104-1534231393025
18/08/14 12:53:13 INFO common.Storage: Storage directory /data/hadoop-hdfs/cache/hdfs/dfs/name has been successfully formatted.
18/08/14 12:53:13 INFO namenode.FSImageFormatProtobuf: Saving image file /data/hadoop-hdfs/cache/hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 using no compression
18/08/14 12:53:13 INFO namenode.FSImageFormatProtobuf: Image file /data/hadoop-hdfs/cache/hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 of size 317 bytes saved in 0 seconds.
18/08/14 12:53:13 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
18/08/14 12:53:13 INFO util.ExitUtil: Exiting with status 0
18/08/14 12:53:13 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at hyd-hadoop-test/192.168.2.5
************************************************************/
Start HDFS
root@hyd-hadoop-test:~# for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done
starting datanode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-datanode-hyd-hyd-hadoop-test.arkit.co.in.out
Started Hadoop datanode (hadoop-hdfs-datanode): [ OK ]
starting namenode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-namenode-hyd-hyd-hadoop-test.arkit.co.in.out
Started Hadoop namenode: [ OK ]
starting secondarynamenode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-secondarynamenode-hyd-hyd-hadoop-test.arkit.co.in.out
Started Hadoop secondarynamenode: [ OK ]
# sudo -u hdfs hadoop fs -mkdir /tmp
# sudo -u hdfs hadoop fs -chmod -R 1777 /tmp
sudo -u hdfs hadoop fs -ls /
root@hyd-hadoop-test:~# cat /etc/hadoop/conf.my_cluster/mapred-site.xml
<configuration>
<property>
<name>mapreduce.fremework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hyd-hadoop-test.arkit.co.in:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hyd-hadoop-test.arkit.co.in:19888</value>
</property>
</configuration>
root@hyd-hadoop-test:~# cat /etc/hadoop/conf.my_cluster/yarn-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<description>List of directories to store localized files in.</description>
<name>yarn.nodemanager.local-dirs</name>
<value>file:///var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
</property>
<property>
<description>Where to store container logs.</description>
<name>yarn.nodemanager.log-dirs</name>
<value>file:///var/log/hadoop-yarn/containers</value>
</property>
<property>
<description>Where to aggregate logs to.</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>hdfs://var/log/hadoop-yarn/apps</value>
</property>
<property>
<description>Classpath for typical applications.</description>
<name>yarn.application.classpath</name>
<value>
$HADOOP_CONF_DIR,
$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*
</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>hyd-hadoop-test.arkit.co.in:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>hyd-hadoop-test.arkit.co.in:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>hyd-hadoop-test.arkit.co.in:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>hyd-hadoop-test.arkit.co.in:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.adress</name>
<value>hyd-hadoop-test.arkit.co.in:8088</value>
</property>
<property>
<name>yarn.log.aggregation-enable</name>
<value>true</value>
</property>
</configuration>
Adding Environment Variables for Haddop component
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_COMMON_HOME=/usr/lib/hadoop
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_HDFS_HOME=/usr/lib/hadoop/hadoop-hdfs
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_YARN_HOME=/usr/lib/hadoop-yarn
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_CONF_DIR=/usr/lib/hadoop/etc/hadoop
root@hyd-hadoop-test:~# mkdir -p /data/yarn/local
--date/time--> 08/14/18 15:02:35
root@hyd-hadoop-test:~# mkdir -p /data/yarn/logs
root@hyd-hadoop-test:~# chown -R yarn:yarn /data/yarn/local
--date/time--> 08/14/18 15:03:11
root@hyd-hadoop-test:~# chown -R yarn:yarn /data/yarn/logs/
root@hyd-hadoop-test:~# sudo service hadoop-yarn-resourcemanager start
starting resourcemanager, logging to /var/log/hadoop-yarn/yarn-yarn-resourcemanager-hyd-hadoop-test.arkit.co.in.out
Started Hadoop resourcemanager: [ OK ]
--date/time--> 08/14/18 15:11:56
root@hyd-hadoop-test:~# sudo service hadoop-yarn-nodemanager start
starting nodemanager, logging to /var/log/hadoop-yarn/yarn-yarn-nodemanager-hyd-hadoop-test.arkit.co.in.out
Started Hadoop nodemanager: [ OK ]
--date/time--> 08/14/18 15:12:10
root@hyd-hadoop-test:~# sudo service hadoop-mapreduce-historyserver start
starting historyserver, logging to /var/log/hadoop-mapreduce/mapred-mapred-historyserver-hyd-hadoop-test.arkit.co.in.out
18/08/14 15:12:17 INFO hs.JobHistoryServer: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting JobHistoryServer
STARTUP_MSG: user = mapred
STARTUP_MSG: host = hyd-hadoop-test/192.168.2.5
STARTUP_MSG: args = []
STARTUP_MSG: version = 2.6.0-cdh5.15.0
STARTUP_MSG: build = http://github.com/cloudera/hadoop -r e3cb23a1cb2b89d074171b44e71f207c3d6ffa50; compiled by 'jenkins' on 2018-05-24T11:19Z
STARTUP_MSG: java = 1.8.0_181
Started Hadoop historyserver: [ OK ]
root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -mkdir -p /user/hdfs
root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -mkdir -p /user/yarn
root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -chown yarn:yarn /user/yarn
## Install all CDH components
yum install hive hive-metastore hive-server2 hive-hbase whirr sqoop2-server sqoop2-client sqoop spark-core spark-master spark-worker spark-history-server spark-python hbase-solr-indexer hbase-solr-doc solr-mapreduce solr-crunch solr-server pig mahout hadoop-kms hadoop-kms-server impala* hue hadoop-httpfs oozie
Hue Configuration
# vi /etc/hadoop/conf.my_cluster/core-site.xml
<!-- Hue WebHDFS proxy user setting -->
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
oozie configuraion
# vi /etc/oozie/conf/oozie-site.xml
<property>
<name>oozie.service.ProxyUserService.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>oozie.service.ProxyUserService.proxyuser.hue.groups</name>
<value>*</value>
</property>
Add Server IP Address abd Port Number
# vi /etc/hue/conf.empty/hue.ini
## Webserver listens on this address and port
http_host=192.168.2.5
http_port=8888
Beeswax configuration
[beeswax]
# Host where HiveServer2 is running.
# If Kerberos security is enabled, use fully-qualified domain name (FQDN).
hive_server_host=192.168.2.5
# Port where HiveServer2 Thrift server runs on.
hive_server_port=10000
# Hive configuration directory, where hive-site.xml is located
hive_conf_dir=/usr/lib/hive/conf/
# Timeout in seconds for thrift calls to Hive service
## server_conn_timeout=120
[impala]
# Host of the Impala Server (one of the Impalad)
server_host=192.168.2.5
# Port of the Impala Server
server_port=21050
# Kerberos principal
## impala_principal=impala/hostname.foo.com
# Turn on/off impersonation mechanism when talking to Impala
impersonation_enabled=False
### Database configuration
[[[mysql]]]
name=mysqldb
engine=mysql
host=localhost
port=3306
user=root
password=temp
options={}
To Connect Database Installing Mariadb in Centos 7
yum install mariadb* -y
# mysql -u root -ptemp
MariaDB [(none)]> create database mysqldb;
Query OK, 1 row affected (0.00 sec)
MariaDB [(none)]> show databases;
+--------------------+
| Database |
+--------------------+
| information_schema |
| mysql |
| mysqldb |
| performance_schema |
+--------------------+
4 rows in set (0.00 sec)
MariaDB [(none)]> exit
Bye
root@hyd-hadoop-test:/usr/lib/hue/pids# /etc/init.d/hive-server2 status
Hive Server2 is not running [FAILED]
--date/time--> 08/14/18 18:04:16
root@hyd-hadoop-test:/usr/lib/hue/pids# /etc/init.d/hive-server2 restart
Stopped Hive Server2: [ OK ]
Started Hive Server2 (hive-server2): [ OK ]
--date/time--> 08/14/18 18:04:25
root@hyd-hadoop-test:/usr/lib/hue/pids# systemctl enable hive-server2
hive-server2.service is not a native service, redirecting to /sbin/chkconfig.
Executing /sbin/chkconfig hive-server2 on
# yum install java
# java --version
openjdk version "1.8.0_181"
OpenJDK Runtime Environment (build 1.8.0_181-b13)
OpenJDK 64-Bit Server VM (build 25.181-b13, mixed mode)
# useradd -g hadoop hadoop
# passwd hadoop
Changing password for user hadoop.
New password:
BAD PASSWORD: The password contains the user name in some form
Retype new password:
passwd: all authentication tokens updated successfully.
# su - hadoop
$ ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/home/hadoop/.ssh/id_rsa):
Created directory '/home/hadoop/.ssh'.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /home/hadoop/.ssh/id_rsa.
Your public key has been saved in /home/hadoop/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:n4d58SIkG9mWM8P6QS8YjxCNxbkhaYKhCNJwFH8a3A8 hadoop@hyd-hyd-hadoop-test.arkit.co.in
The key's randomart image is:
+---[RSA 2048]----+
|++=+ o.. |
|+ooo..+++ |
|o +oE..o |
| + +.+ . |
| . . S @ . |
| . # O o |
| = X = . |
| . * . |
| . |
+----[SHA256]-----+
$ cat ~/.ssh/id_rsa.pub
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkk39QsAbOJwMVy7AenkzYg6nR9YDhVaqwUna9bR2Tu8XsHUYy+x9TiWtrj+3awAb8uuMHqV3Q+aW2Pe3FEwJvWRv0j3tqhYD9UgM4blMAEdHnLhj0bO+AT+y3yhxCcb+7+WEgv1B0pz9HaD8D4NDiYIizUktcetYT3SswS3vZxJrSDr5BsepnHQDMcAgF4xpZwXIbJaoNuQIlO93+L/2OiIqlsjb2BKiBf5EoNTFZt9lJVvCyiV29ujH2eb6zktyIn+D7Z/591Esk1zEcIwIjsjvTJFpb9HXYHJqQhAYC0VrHNRnzRRyGJguzGkFUK8ppTbI7P+W29KBDbpfg7T8z hadoop@hyd-hadoop-test.arkit.co.in
$ cd ~/.ssh/
$ ls
id_rsa id_rsa.pub
$ touch authorized_keys
$ vi authorized_keys
### Paste Above id_rsa.pub content ###
$ chmod 0600 authorized_keys
Downloaded Java fro m oracle site and installed
# yum localinstall jdk-8u172-linux-x64.rpm
Installed:
jdk1.8.x86_64 2000:1.8.0_172-fcs
Complete!
# export JAVA_HOME=/usr/java/jdk1.8.0_172-amd64/
# sudo yum clean all; sudo yum install hadoop-yarn-resourcemanager
# sudo yum clean all; sudo yum install hadoop-hdfs-namenode
# sudo yum clean all; sudo yum install hadoop-hdfs-secondarynamenode
# sudo yum clean all; sudo yum install hadoop-yarn-nodemanager hadoop-hdfs-datanode hadoop-mapreduce -y
# sudo yum clean all; sudo yum install hadoop-mapreduce-historyserver hadoop-yarn-proxyserver -y
# sudo yum clean all; sudo yum install hadoop-client
--Disable Firewall
systemctl stop firewalld/iptables/ip6tables
systemctl disable firewalld/iptables/ip6tables
service stop iptables
service stop ip6tables
chkconfig iptables off
chkconfig ip6tables off
--Disable SELinux
Edit file /etc/selinux/config
SELINUX=disabled
Note: Reboot is required to take effect
root@hyd-hadoop-test:~# cp -r /etc/hadoop/conf.empty/ /etc/hadoop/conf.my_cluster
root@hyd-hadoop-test:~# sudo alternatives --install /etc/hadoop/conf hadoop-conf /etc/hadoop/conf.my_cluster 50
root@hyd-hadoop-test:~# sudo alternatives --set hadoop-conf /etc/hadoop/conf.my_cluster
root@hyd-hadoop-test:~# sudo alternatives --display hadoop-conf
hadoop-conf - status is manual.
link currently points to /etc/hadoop/conf.my_cluster
/etc/hadoop/conf.empty - priority 10
/etc/hadoop/conf.impala - priority 5
/etc/hadoop/conf.my_cluster - priority 50
Current `best' version is /etc/hadoop/conf.my_cluster.
Edit the configuration file and add
# cat /etc/hadoop/conf.my_cluster/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hyd-hadoop-test:8020</value>
<description>NameNode URI</description>
</property>
<property>
<name>fs.trash.interval</name>
<value>30</value>
</property>
<property>
<name>fs.trash.checkpoint.interval</name>
<value>31</value>
</property>
</configuration>
# cat /etc/hadoop/conf.my_cluster/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/hadoop-hdfs/cache/hdfs/dfs/name</value>
</property>
<property>
<name>dfs.datanode.name.dir</name>
<value>file:///data/hadoop-hdfs/cache/hdfs/dfs/data</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
root@hyd-hadoop-test:~# mkdir -p /data/hadoop-hdfs/cache/hdfs/dfs/name
root@hyd-hadoop-test:~# mkdir -p /data/hadoop-hdfs/cache/hdfs/dfs/data
root@hyd-hadoop-test:~# chown -R hdfs:hdfs /data/hadoop-hdfs/cache/hdfs/dfs/data/
root@hyd-hadoop-test:~# chown -R hdfs:hdfs /data/hadoop-hdfs/cache/hdfs/dfs/name/
root@hyd-hadoop-test:~# chmod 775 /data/hadoop-hdfs/cache/hdfs/dfs/name
root@hyd-hadoop-test:~# chmod 775 /data/hadoop-hdfs/cache/hdfs/dfs/data/
format namenode
# sudo -u hdfs hdfs namenode -format
18/08/14 12:53:13 INFO namenode.FSImage: Allocated new BlockPoolId: BP-90611475-10.103.2.104-1534231393025
18/08/14 12:53:13 INFO common.Storage: Storage directory /data/hadoop-hdfs/cache/hdfs/dfs/name has been successfully formatted.
18/08/14 12:53:13 INFO namenode.FSImageFormatProtobuf: Saving image file /data/hadoop-hdfs/cache/hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 using no compression
18/08/14 12:53:13 INFO namenode.FSImageFormatProtobuf: Image file /data/hadoop-hdfs/cache/hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 of size 317 bytes saved in 0 seconds.
18/08/14 12:53:13 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
18/08/14 12:53:13 INFO util.ExitUtil: Exiting with status 0
18/08/14 12:53:13 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at hyd-hadoop-test/192.168.2.5
************************************************************/
Start HDFS
root@hyd-hadoop-test:~# for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done
starting datanode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-datanode-hyd-hyd-hadoop-test.arkit.co.in.out
Started Hadoop datanode (hadoop-hdfs-datanode): [ OK ]
starting namenode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-namenode-hyd-hyd-hadoop-test.arkit.co.in.out
Started Hadoop namenode: [ OK ]
starting secondarynamenode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-secondarynamenode-hyd-hyd-hadoop-test.arkit.co.in.out
Started Hadoop secondarynamenode: [ OK ]
# sudo -u hdfs hadoop fs -mkdir /tmp
# sudo -u hdfs hadoop fs -chmod -R 1777 /tmp
sudo -u hdfs hadoop fs -ls /
root@hyd-hadoop-test:~# cat /etc/hadoop/conf.my_cluster/mapred-site.xml
<configuration>
<property>
<name>mapreduce.fremework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hyd-hadoop-test.arkit.co.in:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hyd-hadoop-test.arkit.co.in:19888</value>
</property>
</configuration>
root@hyd-hadoop-test:~# cat /etc/hadoop/conf.my_cluster/yarn-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<description>List of directories to store localized files in.</description>
<name>yarn.nodemanager.local-dirs</name>
<value>file:///var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
</property>
<property>
<description>Where to store container logs.</description>
<name>yarn.nodemanager.log-dirs</name>
<value>file:///var/log/hadoop-yarn/containers</value>
</property>
<property>
<description>Where to aggregate logs to.</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>hdfs://var/log/hadoop-yarn/apps</value>
</property>
<property>
<description>Classpath for typical applications.</description>
<name>yarn.application.classpath</name>
<value>
$HADOOP_CONF_DIR,
$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*
</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>hyd-hadoop-test.arkit.co.in:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>hyd-hadoop-test.arkit.co.in:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>hyd-hadoop-test.arkit.co.in:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>hyd-hadoop-test.arkit.co.in:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.adress</name>
<value>hyd-hadoop-test.arkit.co.in:8088</value>
</property>
<property>
<name>yarn.log.aggregation-enable</name>
<value>true</value>
</property>
</configuration>
Adding Environment Variables for Haddop component
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_COMMON_HOME=/usr/lib/hadoop
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_HDFS_HOME=/usr/lib/hadoop/hadoop-hdfs
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_YARN_HOME=/usr/lib/hadoop-yarn
root@hyd-hadoop-test:/usr/lib/hadoop-mapreduce# export HADOOP_CONF_DIR=/usr/lib/hadoop/etc/hadoop
root@hyd-hadoop-test:~# mkdir -p /data/yarn/local
--date/time--> 08/14/18 15:02:35
root@hyd-hadoop-test:~# mkdir -p /data/yarn/logs
root@hyd-hadoop-test:~# chown -R yarn:yarn /data/yarn/local
--date/time--> 08/14/18 15:03:11
root@hyd-hadoop-test:~# chown -R yarn:yarn /data/yarn/logs/
root@hyd-hadoop-test:~# sudo service hadoop-yarn-resourcemanager start
starting resourcemanager, logging to /var/log/hadoop-yarn/yarn-yarn-resourcemanager-hyd-hadoop-test.arkit.co.in.out
Started Hadoop resourcemanager: [ OK ]
--date/time--> 08/14/18 15:11:56
root@hyd-hadoop-test:~# sudo service hadoop-yarn-nodemanager start
starting nodemanager, logging to /var/log/hadoop-yarn/yarn-yarn-nodemanager-hyd-hadoop-test.arkit.co.in.out
Started Hadoop nodemanager: [ OK ]
--date/time--> 08/14/18 15:12:10
root@hyd-hadoop-test:~# sudo service hadoop-mapreduce-historyserver start
starting historyserver, logging to /var/log/hadoop-mapreduce/mapred-mapred-historyserver-hyd-hadoop-test.arkit.co.in.out
18/08/14 15:12:17 INFO hs.JobHistoryServer: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting JobHistoryServer
STARTUP_MSG: user = mapred
STARTUP_MSG: host = hyd-hadoop-test/192.168.2.5
STARTUP_MSG: args = []
STARTUP_MSG: version = 2.6.0-cdh5.15.0
STARTUP_MSG: build = http://github.com/cloudera/hadoop -r e3cb23a1cb2b89d074171b44e71f207c3d6ffa50; compiled by 'jenkins' on 2018-05-24T11:19Z
STARTUP_MSG: java = 1.8.0_181
Started Hadoop historyserver: [ OK ]
root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -mkdir -p /user/hdfs
root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -mkdir -p /user/yarn
root@hyd-hadoop-test:~# sudo -u hdfs hadoop fs -chown yarn:yarn /user/yarn
## Install all CDH components
yum install hive hive-metastore hive-server2 hive-hbase whirr sqoop2-server sqoop2-client sqoop spark-core spark-master spark-worker spark-history-server spark-python hbase-solr-indexer hbase-solr-doc solr-mapreduce solr-crunch solr-server pig mahout hadoop-kms hadoop-kms-server impala* hue hadoop-httpfs oozie
Hue Configuration
# vi /etc/hadoop/conf.my_cluster/core-site.xml
<!-- Hue WebHDFS proxy user setting -->
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
oozie configuraion
# vi /etc/oozie/conf/oozie-site.xml
<property>
<name>oozie.service.ProxyUserService.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>oozie.service.ProxyUserService.proxyuser.hue.groups</name>
<value>*</value>
</property>
Add Server IP Address abd Port Number
# vi /etc/hue/conf.empty/hue.ini
## Webserver listens on this address and port
http_host=192.168.2.5
http_port=8888
Beeswax configuration
[beeswax]
# Host where HiveServer2 is running.
# If Kerberos security is enabled, use fully-qualified domain name (FQDN).
hive_server_host=192.168.2.5
# Port where HiveServer2 Thrift server runs on.
hive_server_port=10000
# Hive configuration directory, where hive-site.xml is located
hive_conf_dir=/usr/lib/hive/conf/
# Timeout in seconds for thrift calls to Hive service
## server_conn_timeout=120
[impala]
# Host of the Impala Server (one of the Impalad)
server_host=192.168.2.5
# Port of the Impala Server
server_port=21050
# Kerberos principal
## impala_principal=impala/hostname.foo.com
# Turn on/off impersonation mechanism when talking to Impala
impersonation_enabled=False
### Database configuration
[[[mysql]]]
name=mysqldb
engine=mysql
host=localhost
port=3306
user=root
password=temp
options={}
To Connect Database Installing Mariadb in Centos 7
yum install mariadb* -y
# mysql -u root -ptemp
MariaDB [(none)]> create database mysqldb;
Query OK, 1 row affected (0.00 sec)
MariaDB [(none)]> show databases;
+--------------------+
| Database |
+--------------------+
| information_schema |
| mysql |
| mysqldb |
| performance_schema |
+--------------------+
4 rows in set (0.00 sec)
MariaDB [(none)]> exit
Bye
root@hyd-hadoop-test:/usr/lib/hue/pids# /etc/init.d/hive-server2 status
Hive Server2 is not running [FAILED]
--date/time--> 08/14/18 18:04:16
root@hyd-hadoop-test:/usr/lib/hue/pids# /etc/init.d/hive-server2 restart
Stopped Hive Server2: [ OK ]
Started Hive Server2 (hive-server2): [ OK ]
--date/time--> 08/14/18 18:04:25
root@hyd-hadoop-test:/usr/lib/hue/pids# systemctl enable hive-server2
hive-server2.service is not a native service, redirecting to /sbin/chkconfig.
Executing /sbin/chkconfig hive-server2 on
nice
ReplyDelete
ReplyDeleteVery good article . Thanks for sharing.
Azure DevOps Training Online
Azure DevOps Online Training
Azure DevOps Online Training in Hyderabad
Azure DevOps Course Online
Microsoft Azure DevOps Online Training
Azure DevOps Training in Hyderabad
Azure DevOps Training
Azure DevOps Training in Ameerpet