You've already forked www.colben.cn
update
This commit is contained in:
@@ -76,7 +76,7 @@ Rocky9 | hdp-slave11 | 192.168.8.11/24 | /data/hdp-dn | Datanode, NodeManager
|
|||||||
```bash
|
```bash
|
||||||
echo 'export HADOOP_HOME=/opt/hdp' > /etc/profile.d/hdp.sh
|
echo 'export HADOOP_HOME=/opt/hdp' > /etc/profile.d/hdp.sh
|
||||||
echo 'export PATH=$HADOOP_HOME/bin:$PATH' >> /etc/profile.d/hdp.sh
|
echo 'export PATH=$HADOOP_HOME/bin:$PATH' >> /etc/profile.d/hdp.sh
|
||||||
# 不推荐把 $HADOOP_HOME/sbin 加入环境变量 PATH.
|
# 不推荐把 $HADOOP_HOME/sbin 加入环境变量 PATH,避免与 spark 冲突
|
||||||
source /etc/profile.d/hdp.sh
|
source /etc/profile.d/hdp.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -194,10 +194,15 @@ Rocky9 | hdp-slave11 | 192.168.8.11/24 | /data/hdp-dn | Datanode, NodeManager
|
|||||||
<value>604800</value>
|
<value>604800</value>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<!-- 使用 tez 时需关闭 yarn 虚拟内存检查 -->
|
<!-- 使用 spark/tez 时需关闭 yarn 虚拟内存检查 -->
|
||||||
<name>yarn.nodemanager.vmem-check-enabled</name>
|
<name>yarn.nodemanager.vmem-check-enabled</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<!-- 使用 spark 时需关闭 yarn 虚拟内存检查 -->
|
||||||
|
<name>yarn.nodemanager.pmem-check-enabled</name>
|
||||||
|
<value>false</value>
|
||||||
|
</property>
|
||||||
</configuration>
|
</configuration>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
100
content/post/spark.md
Normal file
100
content/post/spark.md
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
---
|
||||||
|
title: "spark on yarn 部署"
|
||||||
|
date: 2023-05-23T12:00:00+08:00
|
||||||
|
lastmod: 2024-07-19T17:00:00+08:00
|
||||||
|
keywords: []
|
||||||
|
tags: ["hadoop", "spark"]
|
||||||
|
categories: ["hadoop"]
|
||||||
|
---
|
||||||
|
|
||||||
|
## 环境
|
||||||
|
操作系统 | 主机名 | 地址 | 运行组件
|
||||||
|
---- | ---- | ---- | ----
|
||||||
|
Rocky9 | hdp-nn | 192.168.8.1/24 | Namenode
|
||||||
|
Rocky9 | hdp-snn | 192.168.8.2/24 | SecondaryNamenode
|
||||||
|
Rocky9 | hdp-rm | 192.168.8.3/24 | ResourceManager
|
||||||
|
Rocky9 | hdp-slave10 | 192.168.8.10/24 | Datanode, NodeManager
|
||||||
|
Rocky9 | hdp-slave11 | 192.168.8.11/24 | Datanode, NodeManager
|
||||||
|
|
||||||
|
## 前提
|
||||||
|
- [已部署好 hadoop 2.10](/post/hdp2)
|
||||||
|
|
||||||
|
## 部署 spark 环境
|
||||||
|
- 在**全部主机**上执行如下操作
|
||||||
|
- 下载 spark-3.3.4-bin-hadoop2.tgz,解压
|
||||||
|
```bash
|
||||||
|
curl -LO https://archive.apache.org/dist/spark/spark-3.3.4/spark-3.3.4-bin-hadoop2.tgz
|
||||||
|
tar zxf spark-3.3.4-bin-hadoop2.tgz
|
||||||
|
mv spark-3.3.4-bin-hadoop2 /opt/spark
|
||||||
|
```
|
||||||
|
|
||||||
|
- 配置 spark 环境变量
|
||||||
|
```bash
|
||||||
|
echo 'export SPARK_HOME=/opt/spark' > /etc/profile.d/spark.sh
|
||||||
|
echo 'export PATH=$SPARK_HOME/bin:$PATH' >> /etc/profile.d/spark.sh
|
||||||
|
# 不推荐把 $SPARK_HOME/sbin 加入环境变量 PATH,避免与 hadoop 冲突
|
||||||
|
source /etc/profile.d/spark.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### 修改 yarn-site.xml
|
||||||
|
- 在**全部主机**上[关闭 yarn 虚拟内存检查](/post/hdp2/#修改-yarn-sitexml)
|
||||||
|
|
||||||
|
### 修改 capacity-scheduler.xml
|
||||||
|
- 在**全部主机**上执行如下操作
|
||||||
|
- 编辑 $HADOOP_HOME/etc/hadoop/capacity-scheduler.xml,修改内容如下
|
||||||
|
```xml
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.capacity.resource-calculator</name>
|
||||||
|
<!-- <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value> -->
|
||||||
|
<value>org.apache.hadoop.yarn.util.resource.DominantResourceCalculator</value>
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 创建 spark-defaults.conf
|
||||||
|
- 在**全部主机**上执行如下操作
|
||||||
|
- 创建 $SPARK_HOME/conf/spark-defaults.conf,参考内容如下
|
||||||
|
```
|
||||||
|
spark.master yarn
|
||||||
|
spark.eventLog.enabled true
|
||||||
|
spark.eventLog.dir hdfs://hdp-nn:8020/spark-logs
|
||||||
|
```
|
||||||
|
|
||||||
|
### 修改 spark-env.sh
|
||||||
|
- 在**全部主机**上执行如下操作
|
||||||
|
- 创建 $SPARK_HOME/conf/spark-env.sh,参考内容如下
|
||||||
|
```bash
|
||||||
|
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
|
||||||
|
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.fs.logDirectory=hdfs://hdp-nn:8020/spark-logs -Dspark.history.retainedApplications=30"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 创建 spark 日志目录
|
||||||
|
- 在 **hdp-nn** 上创建 spark 日志目录
|
||||||
|
```bash
|
||||||
|
hdfs dfs -mkdir /spark-logs
|
||||||
|
```
|
||||||
|
|
||||||
|
## 启动 spark 日志服务
|
||||||
|
- 在**任一主机** 上启动日志服务
|
||||||
|
```bash
|
||||||
|
/opt/spark/sbin/start-history-server.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## 发布任务
|
||||||
|
- 客户端模式
|
||||||
|
```bash
|
||||||
|
spark-shell
|
||||||
|
```
|
||||||
|
|
||||||
|
- 集群模式
|
||||||
|
```bash
|
||||||
|
spark-submit \
|
||||||
|
--class org.apache.spark.examples.SparkPi \
|
||||||
|
--deploy-mode cluster \
|
||||||
|
$SPARK_HOME/examples/jars/spark-examples_2.12-3.3.4.jar
|
||||||
|
```
|
||||||
|
|
||||||
|
- 浏览器访问 http://{spark 日志服务器}:18080 查看任务进度
|
||||||
|
|
||||||
Reference in New Issue
Block a user