+-
五台机器已经配好ip地址和免秘钥登录,文中不在赘述,有疑问的同学可自行百度,另外本集群采用非root用户搭建,很多命令都带有sudo,若使用root用户搭建,可忽视,如果你也使用非root用户,请特别注意,免秘钥也是非root用户
集群规划
hadoop1 hadoop2 hadoop3 hadoop4 hadoop5 zookeeper zookeeper zookeeper zookeeper zookeeper Namenode Namenode ZKFailoverController ZKFailoverController JournalNode JournalNode JournalNode JournalNode JournalNode DateNode DateNode DateNode NodeManager NodeManager NodeManager ResourceManager ResourceManager准备工作
主机名ip地址映射sudo vim /etc/hosts
151.25.88.141 hadoop1
151.25.88.142 hadoop2
151.25.88.143 hadoop3
151.25.88.144 hadoop4
151.25.88.145 hadoop5
关闭防火墙
sudo systemctl stop firewalld
sudo systemctl disable firewalld
禁用selinux
sudo vim /etc/selinux/config
SELINUX=enforcing --> SELINUX=disabled
配置时间同步(五台节点)
sudo yum -y install ntp ntpdate
ntpd ntp.aliyun.com
集群基础配置
我的所有组件都安装在/opt/soft/ 下,可根据个人习惯调整解压
sudo mkdir -p /opt/soft
sudo tar -zvxf /home/hadoop/download/jdk-8u251-linux-x64.tar.gz -C /opt/soft
sudo tar -zvxf /home/hadoop/download/apache-zookeeper-3.6.1-bin.tar.gz -C /opt/soft
sudo tar -zvxf /home/hadoop/download/hadoop-3.2.1.tar.gz -C /opt/soft
sudo chown hadoop:hadoop /opt/soft -R
配置环境变量
vim ~/.bashrc
export JAVA\_HOME=/opt/soft/jdk1.8.0\_251
export CLASSPATH=.:$JAVA\_HOME/lib/dt.jar:$JAVA\_HOME/lib/tools.jar
export PATH=$JAVA\_HOME/bin:$PATH
export ZOOKEEPER\_HOME=/opt/soft/apache-zookeeper-3.6.1-bin
export PATH=$ZOOKEEPER\_HOME/bin:$PATH
export HADOOP\_HOME=/opt/soft/hadoop-3.2.1
export PATH=$HADOOP\_HOME/bin:$HADOOP\_HOME/sbin:$PATH
hadoop-2.7.7
保存退出后记得source
source ~/.bashrc
zookeeper配置
cd /opt/soft/apache-zookeeper-3.6.1-bin/conf
cp zoo\_sample.cfg zoo.cfg
vim zoo.cfg
更改数据目录和配置日志目录
dataDir=/home/zookeeper/data
dataLogDir=/home/zookeeper/datalog
配置服务器
server.1=hadoop1:2888:3888
server.2=hadoop2:2888:3888
server.3=hadoop3:2888:3888
server.4=hadoop4:2888:3888
server.5=hadoop5:2888:3888
保存退出后,创建数据目录和日志目录,和zoo.cfg对应
sudo mkdir -p /home/zookeeper/data
sudo mkdir -p /home/zookeeper/datalog
sudo chown hadoop:hadoop /home/zookeeper -R
配置zk ID(其他四个节点依次为2,3,4,5)
echo 1 > /home/zookeeper/data/myid
hadoop配置
进到hadoop目录,主要更改6个配置文件
cd /opt/soft/hadoop-3.2.1/etc/hadoop
hadoop-env.sh
vim hadoop-env.sh
#指定JAVA_HOME
export JAVA_HOME=/opt/soft/jdk1.8.0\_251
#指定hadoop用户,hadoop3.x之后必须配置(我的用户名就叫hadoop)
export HDFS_NAMENODE_USER=hadoop
export HDFS_DATANODE_USER=hadoop
export HDFS_ZKFC_USER=hadoop
export HDFS_JOURNALNODE_USER=hadoop
export YARN_RESOURCEMANAGER_USER=hadoop
export YARN_NODEMANAGER_USER=hadoop
core-site.xml
<!--集群名称-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value> </property>
<!--临时目录-->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop_data</value>
</property>
<!--webUI展示时的用户-->
<property>
<name>hadoop.http.staticuser.user</name>
<value>hadoop</value>
</property>
<!--高可用依赖的zookeeper的通讯地址-->
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop1:2181,hadoop2:2181,hadoop3:2181,hadoop4:2181,hadoop5:2181</value>
</property>
hdfs-site.xml
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!--定义hdfs集群中的namenode的ID号-->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<!--定义namenode的主机名和rpc协议的端口-->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>hadoop1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>hadoop2:8020</value>
</property>
<!--定义namenode的主机名和http协议的端口-->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>hadoop1:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>hadoop2:9870</value>
</property>
<!--定义共享edits的url-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop1:8485;hadoop2:8485;hadoop3:8485;hadoop4:8485;hadoop5:8485/ljgk</value>
</property>
<!--定义hdfs的客户端连接hdfs集群时返回active namenode地址-->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--hdfs集群中两个namenode切换状态时的隔离方法-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!--hdfs集群中两个namenode切换状态时的隔离方法的秘钥-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!--journalnode集群中用于保存edits文件的目录-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/journalnode/data</value>
</property>
<!--ha的hdfs集群自动切换namenode的开关-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.safemode.threshold.pct</name>
<value>1</value>
</property>
workers
hadoop3
hadoop4
hadoop5
yarn-site.xml
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
<description>Enable RM high-availability</description>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster1</value>
<description>Name of the cluster</description>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
<description>The list of RM nodes in the cluster when HA is enabled</description>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop1</value>
<description>The hostname of the rm1</description>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop2</value>
<description>The hostname of the rm2</description>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>hadoop1:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoop2:8088</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop1:2181,hadoop2:2181,hadoop3:2181,hadoop4:2181,hadoop5:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
准备启动
启动zookeeper(五台节点)zkServer.sh start
启动hadoop
启动jn(五个节点)hadoop-daemon.sh start journalnode
格式化一个namenode,并启动
hdfs nomenode -format
hadoop-daemon.sh start namenode
另一个namenode手动同步数据,并启动
hdfs namenode -bootstrapStandby
hadoop-daemon.sh start namenode
初始化zkfc(任一节点)
hdfs zkfc -formatZK
停止hadoop所有进程
stop-dfs.sh
全部启动
start-all.sh
就先到这吧,评论区见,火鸡们