1.实验环境:4台虚拟机
- 192.168.131.171 namenode
- 192.168.131.172 node1
- 192.168.131.173 node2
- 192.168.131.174 node3
1.1在每台节点上主备:
- 配置ntp时间同步
- 安装jdk
- 建立数据文件夹,这里是mkdir -p /software/hadoopData
- 配置对应hosts
cat > /etc/hosts<<EOF
192.168.131.171 namenode
192.168.131.172 node1
192.168.131.173 node2
192.168.131.174 node3
EOF
#安装jdk
curl -o /etc/yum.repos.d/abdas.repo https://repo.luckinserver.cn:90/repo/abdas.repo
yum install -y java.x86_64
1.2在namenode上配置密钥登录
#这里注意,本机也需要配置密钥登录
ssh-keygen -f ~/.ssh/id_rsa -P '' -q
ssh-copy-id -i ~/.ssh/id_rsa.pub root@namenode
ssh-copy-id -i ~/.ssh/id_rsa.pub root@node1
ssh-copy-id -i ~/.ssh/id_rsa.pub root@node2
ssh-copy-id -i ~/.ssh/id_rsa.pub root@node3
1.3在namenode上下载hadoop二进制包,并配置环境变量
#在/etc/profile中配置
export PATH=/software/hadoop/bin:/software/hadoop/sbin:$PATH
#在/software/hadoop/etc/hadoop/hadoop-env.sh中配置jdk路径
export JAVA_HOME=/software/java
1.4修改配置文件
#/software/hadoop/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://namenode:9000</value>
<description>HDFS 的 URI,文件系统://namenode标识:端口</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/software/hadoopData</value>
<description>namenode 上传到 hadoop 的临时文件夹</description>
</property>
<property>
<name>fs.trash.interval</name>
<value>4320</value>
</property>
</configuration>
#/software/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/software/hadoopData/dfs/name</value>
<description>namenode 上存储 hdfs 名字空间元数据</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/software/hadoopData/dfs/data</value>
<description>datanode 上数据块的物理存储位置</description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
<description>副本个数,默认配置是 3,应小于 datanode 机器数量</description>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>staff</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node1:50090</value>
</property>
</configuration>
1.5配置DataNode节点
#/software/hadoop/etc/hadoop/slaves
#这里将namenode也加入了datanode
namenode
node1
node2
node3
1.6分发hadoop包
scp -r /software/hadoop node1:/software
scp -r /software/hadoop node2:/software
scp -r /software/hadoop node3:/software
1.7初始化
#在namenode上执行
hdfs namenode -format
start-dfs.sh
2.HDFS HA的部署
需要组件有 namenode、datanode、zookeeper、zkfc、journamnode
实验共使用6台虚拟机,namenode上需要安装zkfc,journamnode和zookeeper需要为奇数,且journamnode最低为3
- namenode1 192.168.131.171
- namenode2 192.168.131.172
- datanode1 192.168.131.173
- datanode2 192.168.131.174
- datanode3 192.168.131.175
- datanode4 192.168.131.176
namenode | datanode | journamnode | zookeeper | zkfc | |
namenode1 | √ | √ | √ | ||
namenode2 | √ | √ | √ | √ | √ |
datanode1 | √ | √ | √ | ||
datanode2 | √ | √ | √ | ||
datanode3 | √ | √ | √ | ||
datanode4 | √ | √ | √ |
2.1环境准备
- 配置ntp时间同步
- 安装jdk
- 建立数据文件夹,这里是mkdir -p /software/hadoopData
- 配置对应hosts
- yum -y install psmisc #在2台namenode上安装,否则无法自动切换
cat > /etc/hosts<<EOF
192.168.131.171 namenode1
192.168.131.172 namenode2
192.168.131.173 datanode1
192.168.131.174 datanode2
192.168.131.175 datanode3
192.168.131.176 datanode4
EOF
#安装jdk
curl -o /etc/yum.repos.d/abdas.repo https://repo.luckinserver.cn:90/repo/abdas.repo
yum install -y java.x86_64
2.2在namenode上配置密钥登录
#这里注意,本机也需要配置密钥登录
ssh-keygen -f ~/.ssh/id_rsa -P '' -q
ssh-copy-id -i ~/.ssh/id_rsa.pub root@namenode1
ssh-copy-id -i ~/.ssh/id_rsa.pub root@namenode2
ssh-copy-id -i ~/.ssh/id_rsa.pub root@datanode1
ssh-copy-id -i ~/.ssh/id_rsa.pub root@datanode2
ssh-copy-id -i ~/.ssh/id_rsa.pub root@datanode3
ssh-copy-id -i ~/.ssh/id_rsa.pub root@datanode4
2.3部署zookeeper
#下载解压二进制包后,修改配置文件
cp /software/zookeeper/conf/zoo_sample.cfg /software/zookeeper/conf/zoo.cfg
cat >> /software/zookeeper/conf/zoo.cfg<<EOF
server.1=namenode2:2888:3888
server.2=datanode1:2888:3888
server.3=datanode2:2888:3888
server.4=datanode3:2888:3888
server.5=datanode4:2888:3888
EOF
mkdir -p /tmp/zookeeper
在/tmp/zookeeper下生产myid文件,并对应server序号(1,2,3,4,5)
在/software/zookeeper/bin/zkEnv.sh添加java环境变量
export JAVA_HOME=/software/java
#制作systemctl启动
cat > /usr/lib/systemd/system/zookeeper.service <<EOF
[Unit]
Description=zookeeper.service
After=network.target
[Service]
Type=forking
ExecStart=/software/zookeeper/bin/zkServer.sh start
ExecStop=/software/zookeeper/bin/zkServer.sh stop
[Install]
WantedBy=multi-user.target
EOF
2.4部署hadoop二进制包
#修改hdfs-site.xml
vim /software/hadoop/etc/hadoop/hdfs-site.xml
<property>
<name>dfs.namenode.name.dir</name>
<value>/software/hadoopData/dfs/name</value>
<description>namenode 上存储 hdfs 名字空间元数据</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/software/hadoopData/dfs/data</value>
<description>datanode 上数据块的物理存储位置</description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
<description>副本个数,默认配置是 3,应小于 datanode 机器数量</description>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>staff</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>namenode1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>namenode2:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>namenode1:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>namenode2:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://namenode2:8485;datanode1:8485;datanode2:8485;datanode3:8485;datanode4:8485/mycluster</value>
<description>jn个数</description>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/software/hadoop/ha/jn</value>
<description>jn日志存放路径</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
---------------------------------------------------------------------------------------------------------------------------------------
#修改core-site.xml
vim /software/hadoop/etc/hadoop/core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
<description>hdfs的namespace</description>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>namenode2:2181,datanode1:2181,datanode2:2181,datanode3:2181,datanode4:2181</value>
<description>zk的个数</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/software/hadoopData</value>
<description>namenode 上传到 hadoop 的临时文件夹</description>
</property>
- 在每个jn节点上运行 /software/hadoop/sbin/hadoop-daemon.sh start journalnode
- 在namenode1上运行 hdfs namenode -format
- 在namenode1上运行 hadoop-daemon.sh start namenode
- 在namenode2上运行 hdfs namenode -bootstrapStandby
- 在2台namenode上运行 hdfs zkfc -formatZK
- 在namenode1上运行 stop-dfs.sh && start-dfs.sh
3.添加hdfs支持nfs方式挂载
#/software/hadoop/etc/hadoop/core-site.xml中添加
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
#启动nfs gataway
hadoop-daemon.sh start portmap
hadoop-daemon.sh start nfs3
#挂载
mount -t nfs -o vers=3,proto=tcp,nolock,noacl,sync 192.168.131.171:/ /hdfsmount