中间件

HDFS部署与使用

admin · 1月4日 · 2021年

1.实验环境:4台虚拟机

  1. 192.168.131.171 namenode
  2. 192.168.131.172 node1
  3. 192.168.131.173 node2
  4. 192.168.131.174 node3

1.1在每台节点上主备:

  1. 配置ntp时间同步
  2. 安装jdk
  3. 建立数据文件夹,这里是mkdir -p /software/hadoopData
  4. 配置对应hosts
cat > /etc/hosts<<EOF
192.168.131.171 namenode
192.168.131.172 node1
192.168.131.173 node2
192.168.131.174 node3
EOF

#安装jdk
curl -o /etc/yum.repos.d/abdas.repo  https://repo.luckinserver.cn:90/repo/abdas.repo
yum install -y java.x86_64 

1.2在namenode上配置密钥登录

#这里注意,本机也需要配置密钥登录

ssh-keygen -f ~/.ssh/id_rsa -P '' -q
ssh-copy-id -i ~/.ssh/id_rsa.pub root@namenode
ssh-copy-id -i ~/.ssh/id_rsa.pub root@node1
ssh-copy-id -i ~/.ssh/id_rsa.pub root@node2
ssh-copy-id -i ~/.ssh/id_rsa.pub root@node3

1.3在namenode上下载hadoop二进制包,并配置环境变量

#在/etc/profile中配置
export PATH=/software/hadoop/bin:/software/hadoop/sbin:$PATH

#在/software/hadoop/etc/hadoop/hadoop-env.sh中配置jdk路径
export JAVA_HOME=/software/java

1.4修改配置文件

#/software/hadoop/etc/hadoop/core-site.xml

<configuration>
 <property>
  <name>fs.defaultFS</name>
  <value>hdfs://namenode:9000</value>
  <description>HDFS 的 URI,文件系统://namenode标识:端口</description>
 </property>

 <property>
  <name>hadoop.tmp.dir</name>
  <value>/software/hadoopData</value>
  <description>namenode 上传到 hadoop 的临时文件夹</description>
 </property>

 <property>
   <name>fs.trash.interval</name>
   <value>4320</value>
 </property>
</configuration>

#/software/hadoop/etc/hadoop/hdfs-site.xml

<configuration>
<property>
   <name>dfs.namenode.name.dir</name>
   <value>/software/hadoopData/dfs/name</value>
   <description>namenode 上存储 hdfs 名字空间元数据</description>
 </property>
 
 <property>
   <name>dfs.datanode.data.dir</name>
   <value>/software/hadoopData/dfs/data</value>
   <description>datanode 上数据块的物理存储位置</description>
 </property>
 
 <property>
   <name>dfs.replication</name>
   <value>3</value>
   <description>副本个数,默认配置是 3,应小于 datanode 机器数量</description>
 </property>
 
 <property>
   <name>dfs.webhdfs.enabled</name>
   <value>true</value>
 </property>
 
 <property>
   <name>dfs.permissions.superusergroup</name>
   <value>staff</value>
 </property>
 
 <property>
   <name>dfs.permissions.enabled</name>
   <value>false</value>
 </property>

 <property>
   <name>dfs.namenode.secondary.http-address</name>
   <value>node1:50090</value>
 </property>

</configuration>

1.5配置DataNode节点

#/software/hadoop/etc/hadoop/slaves
#这里将namenode也加入了datanode
namenode
node1
node2
node3

1.6分发hadoop包

scp -r /software/hadoop node1:/software
scp -r /software/hadoop node2:/software
scp -r /software/hadoop node3:/software

1.7初始化

#在namenode上执行

hdfs namenode -format
start-dfs.sh

2.HDFS HA的部署

需要组件有 namenode、datanode、zookeeper、zkfc、journamnode

实验共使用6台虚拟机,namenode上需要安装zkfc,journamnode和zookeeper需要为奇数,且journamnode最低为3

  1. namenode1 192.168.131.171
  2. namenode2 192.168.131.172
  3. datanode1 192.168.131.173
  4. datanode2 192.168.131.174
  5. datanode3 192.168.131.175
  6. datanode4 192.168.131.176
namenodedatanodejournamnodezookeeperzkfc
namenode1
namenode2
datanode1
datanode2
datanode3
datanode4

2.1环境准备

  1. 配置ntp时间同步
  2. 安装jdk
  3. 建立数据文件夹,这里是mkdir -p /software/hadoopData
  4. 配置对应hosts
  5. yum -y install psmisc #在2台namenode上安装,否则无法自动切换
cat > /etc/hosts<<EOF
192.168.131.171  namenode1
192.168.131.172  namenode2
192.168.131.173  datanode1
192.168.131.174  datanode2
192.168.131.175  datanode3
192.168.131.176  datanode4
EOF

#安装jdk
curl -o /etc/yum.repos.d/abdas.repo  https://repo.luckinserver.cn:90/repo/abdas.repo
yum install -y java.x86_64 

2.2在namenode上配置密钥登录

#这里注意,本机也需要配置密钥登录

ssh-keygen -f ~/.ssh/id_rsa -P '' -q
ssh-copy-id -i ~/.ssh/id_rsa.pub root@namenode1
ssh-copy-id -i ~/.ssh/id_rsa.pub root@namenode2
ssh-copy-id -i ~/.ssh/id_rsa.pub root@datanode1
ssh-copy-id -i ~/.ssh/id_rsa.pub root@datanode2
ssh-copy-id -i ~/.ssh/id_rsa.pub root@datanode3
ssh-copy-id -i ~/.ssh/id_rsa.pub root@datanode4

2.3部署zookeeper

#下载解压二进制包后,修改配置文件
cp /software/zookeeper/conf/zoo_sample.cfg  /software/zookeeper/conf/zoo.cfg

cat >> /software/zookeeper/conf/zoo.cfg<<EOF
server.1=namenode2:2888:3888
server.2=datanode1:2888:3888
server.3=datanode2:2888:3888
server.4=datanode3:2888:3888
server.5=datanode4:2888:3888
EOF

mkdir -p /tmp/zookeeper

在/tmp/zookeeper下生产myid文件,并对应server序号(1,2,3,4,5)

在/software/zookeeper/bin/zkEnv.sh添加java环境变量
export JAVA_HOME=/software/java

#制作systemctl启动
cat > /usr/lib/systemd/system/zookeeper.service  <<EOF
[Unit]
Description=zookeeper.service
After=network.target
[Service]
Type=forking
ExecStart=/software/zookeeper/bin/zkServer.sh start
ExecStop=/software/zookeeper/bin/zkServer.sh stop
[Install]
WantedBy=multi-user.target
EOF

2.4部署hadoop二进制包

#修改hdfs-site.xml
vim /software/hadoop/etc/hadoop/hdfs-site.xml

<property>
   <name>dfs.namenode.name.dir</name>
   <value>/software/hadoopData/dfs/name</value>
   <description>namenode 上存储 hdfs 名字空间元数据</description>
 </property>
 
 <property>
   <name>dfs.datanode.data.dir</name>
   <value>/software/hadoopData/dfs/data</value>
   <description>datanode 上数据块的物理存储位置</description>
 </property>
 
 <property>
   <name>dfs.replication</name>
   <value>3</value>
   <description>副本个数,默认配置是 3,应小于 datanode 机器数量</description>
 </property>
 
 <property>
   <name>dfs.webhdfs.enabled</name>
   <value>true</value>
 </property>
 
 <property>
   <name>dfs.permissions.superusergroup</name>
   <value>staff</value>
 </property>
 
 <property>
   <name>dfs.permissions.enabled</name>
   <value>false</value>
 </property>



<property>
  <name>dfs.nameservices</name>
  <value>mycluster</value>
</property>
<property>
  <name>dfs.ha.namenodes.mycluster</name>
  <value>nn1,nn2</value>
</property>
<property>
  <name>dfs.namenode.rpc-address.mycluster.nn1</name>
  <value>namenode1:8020</value>
</property>
<property>
  <name>dfs.namenode.rpc-address.mycluster.nn2</name>
  <value>namenode2:8020</value>
</property>
<property>
  <name>dfs.namenode.http-address.mycluster.nn1</name>
  <value>namenode1:50070</value>
</property>
<property>
  <name>dfs.namenode.http-address.mycluster.nn2</name>
  <value>namenode2:50070</value>
</property>
<property>
  <name>dfs.namenode.shared.edits.dir</name>
  <value>qjournal://namenode2:8485;datanode1:8485;datanode2:8485;datanode3:8485;datanode4:8485/mycluster</value>
  <description>jn个数</description>
</property>
<property>
  <name>dfs.journalnode.edits.dir</name>
  <value>/software/hadoop/ha/jn</value>
  <description>jn日志存放路径</description>
</property>
<property>
  <name>dfs.client.failover.proxy.provider.mycluster</name>
  <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
  <name>dfs.ha.fencing.methods</name>
  <value>sshfence</value>
</property>
<property>
  <name>dfs.ha.fencing.ssh.private-key-files</name>
  <value>/root/.ssh/id_rsa</value>
</property>
<property>
   <name>dfs.ha.automatic-failover.enabled</name>
   <value>true</value>
 </property>

---------------------------------------------------------------------------------------------------------------------------------------

#修改core-site.xml 

vim /software/hadoop/etc/hadoop/core-site.xml 

<property>
  <name>fs.defaultFS</name>
  <value>hdfs://mycluster</value>  
  <description>hdfs的namespace</description>
</property>
<property>
  <name>ha.zookeeper.quorum</name>
  <value>namenode2:2181,datanode1:2181,datanode2:2181,datanode3:2181,datanode4:2181</value>
  <description>zk的个数</description>
</property>
<property>
  <name>hadoop.tmp.dir</name>
  <value>/software/hadoopData</value>
  <description>namenode 上传到 hadoop 的临时文件夹</description>
</property>
  1. 在每个jn节点上运行 /software/hadoop/sbin/hadoop-daemon.sh start journalnode
  2. 在namenode1上运行 hdfs namenode -format
  3. 在namenode1上运行 hadoop-daemon.sh start namenode
  4. 在namenode2上运行 hdfs namenode -bootstrapStandby
  5. 在2台namenode上运行 hdfs zkfc -formatZK
  6. 在namenode1上运行 stop-dfs.sh && start-dfs.sh

3.添加hdfs支持nfs方式挂载

#/software/hadoop/etc/hadoop/core-site.xml中添加

<property>
  <name>hadoop.proxyuser.root.groups</name>
  <value>*</value>
</property>
<property>
  <name>hadoop.proxyuser.root.hosts</name>
  <value>*</value>
</property>

#启动nfs gataway

hadoop-daemon.sh start portmap
hadoop-daemon.sh start nfs3

#挂载
mount -t nfs -o vers=3,proto=tcp,nolock,noacl,sync 192.168.131.171:/ /hdfsmount