You've already forked www.colben.cn
429 lines
13 KiB
Markdown
429 lines
13 KiB
Markdown
---
|
||
title: "hadoop3.3 集群部署"
|
||
date: 2023-07-28T14:00:00+08:00
|
||
lastmod: 2026-02-03T11:00:00+08:00
|
||
keywords: []
|
||
tags: ["hadoop", "hive", "tez", "hbase", "spark"]
|
||
categories: ["hadoop"]
|
||
---
|
||
|
||
## 环境
|
||
主机名 | 地址 | 数据目录 | 组件
|
||
---- | ---- | ---- | ----
|
||
hdp-nn | 192.168.8.1/24 | /data/hdp-nn | Namenode
|
||
hdp-snn | 192.168.8.2/24 | /data/hdp-snn | SecondaryNamenode
|
||
hdp-rm | 192.168.8.3/24 | - | ResourceManager
|
||
hdp-worker0 | 192.168.8.10/24 | /data/hdp-dn | Datanode NodeManager
|
||
hdp-worker1 | 192.168.8.11/24 | /data/hdp-dn | Datanode NodeManager
|
||
hive-hs | 192.168.8.20/24 | - | HiveServer2 Tez
|
||
hive-ms | 192.168.8.21/24 | - | HiveMetastore Tez
|
||
|
||
## 部署 hadoop 集群
|
||
### 服务器初始配置
|
||
- 在**全部主机**上执行如下操作
|
||
- 禁用防火墙
|
||
- 禁用 selinux
|
||
- 配置时间同步
|
||
- 配置主机名解析,修改 /etc/hosts,增加如下内容
|
||
```bash
|
||
# hadoop
|
||
192.168.8.1 hdp-nn
|
||
192.168.8.2 hdp-snn
|
||
192.168.8.3 hdp-dn
|
||
192.168.8.10 hdp-worker0
|
||
192.168.8.11 hdp-worker1
|
||
```
|
||
|
||
### ssh 免密登录
|
||
- 在 **hdp-nn** 配置 ssh 免密登录 hdp-nn、hdp-snn 和 hdp-workerX
|
||
```bash
|
||
ssh-copy-id hdp-nn
|
||
ssh-copy-id hdp-snn
|
||
ssh-copy-id hdp-worker0
|
||
ssh-copy-id hdp-worker1
|
||
```
|
||
|
||
- 在 **hdp-rm** 上配置 ssh 免密登录 hdp-rm 和 hdp-workerX
|
||
```bash
|
||
ssh-copy-id hdp-rm
|
||
ssh-copy-id hdp-worker0
|
||
ssh-copy-id hdp-worker1
|
||
```
|
||
|
||
### 部署 jdk8 环境
|
||
- 在**全部主机**上下载**最新的 jdk8 安装包**,解压
|
||
```bash
|
||
tar zxf jdk-8u471-linux-x64.tar.gz
|
||
mv jdk1.8.0_471 /opt/jdk
|
||
# 无需配置 jdk 环境变量
|
||
```
|
||
|
||
### 部署 dfs 和 yarn 集群
|
||
- 在**全部主机**上执行如下操作
|
||
- 下载 hadoop 3.3.6 部署包,解压
|
||
```bash
|
||
curl -LO https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
|
||
tar zxf hadoop-3.3.6.tar.gz
|
||
mv hadoop-3.3.6 /opt/hdp
|
||
```
|
||
|
||
- 配置 hadoop 环境变量
|
||
```bash
|
||
echo 'export HADOOP_HOME=/opt/hdp' > /etc/profile.d/hdp.sh
|
||
echo 'export PATH=$HADOOP_HOME/bin:$PATH' >> /etc/profile.d/hdp.sh
|
||
# 不推荐把 $HADOOP_HOME/sbin 加入环境变量 PATH,避免与 spark 冲突
|
||
source /etc/profile.d/hdp.sh
|
||
```
|
||
|
||
- 编辑 $HADOOP_HOME/etc/hadoop/hadoop-env.sh,指定 JAVA_HOME 环境变量和运行各组件的用户
|
||
```bash
|
||
export JAVA_HOME=/opt/jdk
|
||
export HDFS_NAMENODE_USER=root
|
||
export HDFS_DATANODE_USER=root
|
||
export HDFS_SECONDARYNAMENODE_USER=root
|
||
export YARN_RESOURCEMANAGER_USER=root
|
||
export YARN_NODEMANAGER_USER=root
|
||
```
|
||
|
||
- 编辑 $HADOOP_HOME/etc/hadoop/core-site.xml,参考内容如下
|
||
```xml
|
||
<configuration>
|
||
<property>
|
||
<!-- namenode 的 hdfs 协议通信地址 -->
|
||
<name>fs.defaultFS</name>
|
||
<value>hdfs://hdp-nn:8020</value>
|
||
</property>
|
||
<property>
|
||
<!-- hadoop 集群存储临时文件的目录,datanode 里建议挂载独立盘 -->
|
||
<name>hadoop.tmp.dir</name>
|
||
<value>/tmp/hdp</value>
|
||
</property>
|
||
<property>
|
||
<!-- hive beeline 登录用户 root -->
|
||
<name>hadoop.proxyuser.root.hosts</name>
|
||
<value>*</value>
|
||
</property>
|
||
<property>
|
||
<!-- hive beeline 登录用户 root -->
|
||
<name>hadoop.proxyuser.root.groups</name>
|
||
<value>*</value>
|
||
</property>
|
||
</configuration>
|
||
```
|
||
|
||
- 编辑 $HADOOP_HOME/etc/hadoop/hdfs-site.xml,参考内容如下
|
||
```xml
|
||
<configuration>
|
||
<property>
|
||
<!-- namenode 元数据存放位置,可指定多个目录(用逗号分隔)实现容错 -->
|
||
<name>dfs.namenode.name.dir</name>
|
||
<value>/data/hdp_nn</value>
|
||
</property>
|
||
<property>
|
||
<!-- secondary namenode 镜像数据存放位置,可指定多个目录(用逗号分隔)实现容错 -->
|
||
<name>dfs.namenode.checkpoint.dir</name>
|
||
<value>/data/hdp_snn</value>
|
||
</property>
|
||
<property>
|
||
<!-- datanode 数据块存放位置,可指定多个目录(多盘,用逗号分隔)提高读写 io -->
|
||
<name>dfs.datanode.data.dir</name>
|
||
<value>/data/hdp_dn</value>
|
||
</property>
|
||
<property>
|
||
<!-- namenode 的 Web UI 访问地址 -->
|
||
<name>dfs.namenode.http-address</name>
|
||
<value>hdp-nn:9870</value>
|
||
</property>
|
||
<property>
|
||
<!-- secondary namenode 的主机和端口 -->
|
||
<name>dfs.namenode.secondary.http-address</name>
|
||
<value>hdp-snn:9868</value>
|
||
</property>
|
||
<property>
|
||
<!-- hdfs 副本数量,默认3,这里设置为2,保证两个 datanode 时数据有冗余 -->
|
||
<name>dfs.replication</name>
|
||
<value>2</value>
|
||
</property>
|
||
<property>
|
||
<!-- 启用 webhdfs api -->
|
||
<name>dfs.webhdfs.enabled</name>
|
||
<value>true</value>
|
||
</property>
|
||
</configuration>
|
||
```
|
||
|
||
- 编辑 $HADOOP_HOME/etc/hadoop/yarn-site.xml,参考内容如下
|
||
```xml
|
||
<configuration>
|
||
<property>
|
||
<name>yarn.nodemanager.aux-services</name>
|
||
<value>mapreduce_shuffle</value>
|
||
</property>
|
||
<property>
|
||
<!--resourcemanager 的主机名-->
|
||
<name>yarn.resourcemanager.hostname</name>
|
||
<value>hdp-rm</value>
|
||
</property>
|
||
<property>
|
||
<!-- resourcemanager 的 Web UI 访问地址 (默认端口8088) -->
|
||
<name>yarn.resourcemanager.webapp.address</name>
|
||
<value>hdp-rm:8088</value>
|
||
</property>
|
||
<!-- 设置 nodemanager 可用 6 核处理器
|
||
<property>
|
||
<name>yarn.nodemanager.resource.cpu-vcores</name>
|
||
<value>6</value>
|
||
</property> -->
|
||
<!-- 设置 nodemanager 可用 12GB 内存
|
||
<property>
|
||
<name>yarn.nodemanager.resource.memory-mb</name>
|
||
<value>12288</value>
|
||
</property> -->
|
||
<property>
|
||
<!-- (可选) 开启日志聚集功能,方便在Web UI上查看已完成任务的日志 -->
|
||
<name>yarn.log-aggregation-enable</name>
|
||
<value>true</value>
|
||
</property>
|
||
<property>
|
||
<!-- (可选) 日志保留时间(7天) -->
|
||
<name>yarn.log-aggregation.retain-seconds</name>
|
||
<value>604800</value>
|
||
</property>
|
||
<property>
|
||
<!-- 使用 spark/tez 时需关闭 yarn 虚拟内存检查 -->
|
||
<name>yarn.nodemanager.vmem-check-enabled</name>
|
||
<value>false</value>
|
||
</property>
|
||
<property>
|
||
<!-- 使用 spark 时需关闭 yarn 虚拟内存检查 -->
|
||
<name>yarn.nodemanager.pmem-check-enabled</name>
|
||
<value>false</value>
|
||
</property>
|
||
</configuration>
|
||
```
|
||
|
||
- 无需修改 $HADOOP_HOME/etc/hadoop/mapred-env.sh
|
||
- 编辑 $HADOOP_HOME/etc/hadoop/mapred-site.xml,参考内容如下
|
||
```xml
|
||
<configuration>
|
||
<property>
|
||
<name>mapreduce.framework.name</name>
|
||
<value>yarn</value>
|
||
</property>
|
||
<property>
|
||
<!-- MapReduce JobHistory Server 地址 -->
|
||
<name>mapreduce.jobhistory.address</name>
|
||
<value>hdp-rm:10020</value>
|
||
</property>
|
||
<property>
|
||
<!-- MapReduce JobHistory Server Web UI 地址 (默认端口19888) -->
|
||
<name>mapreduce.jobhistory.webapp.address</name>
|
||
<value>hdp-rm:19888</value>
|
||
</property>
|
||
<property>
|
||
<name>yarn.app.mapreduce.am.env</name>
|
||
<value>HADOOP_MAPRED_HOME=/opt/hdp</value>
|
||
</property>
|
||
<property>
|
||
<name>mapreduce.map.env</name>
|
||
<value>HADOOP_MAPRED_HOME=/opt/hdp</value>
|
||
</property>
|
||
<property>
|
||
<name>mapreduce.reduce.env</name>
|
||
<value>HADOOP_MAPRED_HOME=/opt/hdp</value>
|
||
</property>
|
||
</configuration>
|
||
```
|
||
|
||
- 编辑 $HADOOP_HOME/etc/hadoop/workers,替换成全部的 worker 主机,参考内容如下
|
||
```
|
||
hdp-worker0
|
||
hdp-worker1
|
||
```
|
||
|
||
### 格式化 namenode
|
||
- 在 **hdp-nn** 上执行如下操作
|
||
```bash
|
||
hdfs namenode -format
|
||
```
|
||
|
||
## 启动 hadoop 集群
|
||
- 在 **hdp-nn** 上启动 dfs 集群
|
||
```bash
|
||
/opt/hdp/sbin/start-dfs.sh
|
||
```
|
||
|
||
- 在 **hdp-rm** 上启动 yarn 集群
|
||
```bash
|
||
/opt/hdp/sbin/start-yarn.sh
|
||
```
|
||
|
||
- 在 **hdp-X** 上查看 java 进程
|
||
```bash
|
||
/opt/jdk/bin/jps
|
||
```
|
||
|
||
---
|
||
|
||
## 部署 hive 集群
|
||
### 前提
|
||
- [已部署好 mysql 8](/post/mysql-install/#安装-mysql84-通用二进制包)
|
||
- 已创建好 mysql 用户机器数据库,参考 sql 如下
|
||
```sql
|
||
create user hive@'%' identified by 'Hive_1234';
|
||
create database hive default charset utf8mb4;
|
||
grant all on hive.* to hive@'%';
|
||
```
|
||
|
||
### 服务器初始配置
|
||
- 在 **hive-X** 上配置主机名解析,修改 /etc/hosts,增加如下内容
|
||
```
|
||
# 注意前面的 hadoop 解析记录不能删
|
||
# hive
|
||
192.168.8.20 hive-hs
|
||
192.168.8.21 hive-ms
|
||
```
|
||
|
||
### 部署 tez 环境
|
||
- 在 **hive-X** 上下载 tez 0.10.4 安装包,解压
|
||
```bash
|
||
curl -LO https://mirrors.tuna.tsinghua.edu.cn/apache/tez/0.10.4/apache-tez-0.10.4-bin.tar.gz
|
||
tar zxf apache-tez-0.10.4-bin.tar.gz
|
||
mv apache-tez-0.10.4-bin /opt/tez
|
||
```
|
||
|
||
- 在 **hive-hs** 上 put tez.tar.gz 到 hdfs
|
||
```bash
|
||
hdfs dfs -mkdir /tez
|
||
hdfs dfs -put /opt/tez/share/tez.tar.gz /tez/
|
||
```
|
||
|
||
- 在**全部主机**上修改 $HADOOP_HOME/etc/hadoop/mapred-site.xml,关闭 yarn 虚拟内存检查(已关闭)
|
||
- 在**全部主机**上创建 $HADOOP_HOME/etc/hadoop/tez-site.xml,参考内容如下
|
||
```xml
|
||
<configuration>
|
||
<property>
|
||
<name>tez.lib.uris</name>
|
||
<type>string</type>
|
||
<value>${fs.defaultFS}/tez/tez.tar.gz</value>
|
||
</property>
|
||
</configuration>
|
||
```
|
||
|
||
- 重启 hadoop dfs 和 yarn 集群
|
||
|
||
### 部署 hive 环境
|
||
- 在 **hive-X** 上执行如下操作
|
||
- 下载 hive 4.0.1 部署包,解压
|
||
```bash
|
||
curl -LO https://archive.apache.org/dist/hive/hive-4.0.1/apache-hive-4.0.1-bin.tar.gz
|
||
tar zxf apache-hive-4.0.1-bin.tar.gz
|
||
mv apache-hive-4.0.1-bin /opt/hive
|
||
```
|
||
|
||
- 下载 mysql 连接库,解压到 hive 库目录下
|
||
```bash
|
||
curl -LO https://downloads.mysql.com/archives/get/p/3/file/mysql-connector-j-8.0.33.tar.gz
|
||
tar zxf mysql-connector-j-8.0.33.tar.gz mysql-connector-j-8.0.33/mysql-connector-j-8.0.33.jar
|
||
mv mysql-connector-j-8.0.33/mysql-connector-j-8.0.33.jar /opt/hive/lib/
|
||
rm -rf mysql-connector-j-8.0.33*
|
||
```
|
||
|
||
- 配置环境变量
|
||
```bash
|
||
echo 'export HIVE_HOME=/opt/hive' > /etc/profile.d/hive.sh
|
||
echo 'export PATH=$HIVE_HOME/bin:$PATH' >> /etc/profile.d/hive.sh
|
||
source /etc/profile.d/hive.sh
|
||
```
|
||
|
||
- 编辑 $HIVE_HOME/conf/hive-env.sh,指定 HADOOP_HOME 环境变量和 tez 库
|
||
```bash
|
||
HADOOP_HOME=/opt/hdp
|
||
export TEZ_HOME=/opt/tez
|
||
export HIVE_AUX_JARS_PATH=$TEZ_HOME/lib
|
||
export HADOOP_CLASSPATH=$TEZ_HOME:$TEZ_HOME/lib
|
||
```
|
||
|
||
- 创建 $HIVE_HOME/conf/hive-site.xml,参考内容如下
|
||
```xml
|
||
<configuration>
|
||
<property>
|
||
<!-- mysql 地址 -->
|
||
<name>javax.jdo.option.ConnectionURL</name>
|
||
<value>jdbc:mysql://mysql-ip:3306/hive?createDatabaseIfNotExist=true&useSSL=false</value>
|
||
</property>
|
||
<property>
|
||
<!-- mysql 驱动 -->
|
||
<name>javax.jdo.option.ConnectionDriverName</name>
|
||
<value>com.mysql.cj.jdbc.Driver</value>
|
||
</property>
|
||
<property>
|
||
<!-- mysql 用户 -->
|
||
<name>javax.jdo.option.ConnectionUserName</name>
|
||
<value>hive</value>
|
||
</property>
|
||
<property>
|
||
<!-- mysql 密码 -->
|
||
<name>javax.jdo.option.ConnectionPassword</name>
|
||
<value>Hive_1234</value>
|
||
</property>
|
||
<property>
|
||
<!-- 自动初始化 hive 库 -->
|
||
<name>datanucleus.schema.autoCreateAll</name>
|
||
<value>true</value>
|
||
</property>
|
||
<property>
|
||
<name>hive.cli.print.header</name>
|
||
<value>true</value>
|
||
</property>
|
||
<property>
|
||
<name>hive.cli.print.current.db</name>
|
||
<value>true</value>
|
||
</property>
|
||
<property>
|
||
<!-- hive server 端口 -->
|
||
<name>hive.server2.webui.port</name>
|
||
<value>10002</value>
|
||
</property>
|
||
<property>
|
||
<!-- 数据存储位置(hdfs) -->
|
||
<name>hive.metastore.warehouse.dir</name>
|
||
<value>/hive/warehouse</value>
|
||
</property>
|
||
<property>
|
||
<!-- hive metastore 端口-->
|
||
<name>hive.metastore.uris</name>
|
||
<value>thrift://hive-ms:9083</value>
|
||
</property>
|
||
<property>
|
||
<!-- hive 默认使用 tez 引擎 -->
|
||
<name>hive.execution.engine</name>
|
||
<value>tez</value>
|
||
</property>
|
||
</configuration>
|
||
```
|
||
|
||
### 初始化 hive
|
||
- 在 **hive-ms** 上初始化 mysql 库
|
||
```bash
|
||
schematool -dbType mysql -initSchema
|
||
```
|
||
|
||
### 启动 hive 集群
|
||
- 在 **hive-ms** 上启动 hive metastore
|
||
```bash
|
||
hive --service metastore
|
||
```
|
||
|
||
- 在 **hive-hs** 上启动 hive server
|
||
```bash
|
||
hive --service hiveserver2
|
||
```
|
||
|
||
### 客户端
|
||
- beeline 连接,需要先在 $HADOOP_HOME/etc/hadoop/core-site.xml 中配置 proxyuser(已配置)
|
||
```bash
|
||
beeline -u jdbc:hive2://hive-hs:10000 -n root
|
||
```
|
||
|