You've already forked www.colben.cn
udpate
This commit is contained in:
@@ -233,7 +233,7 @@ hbase-rs1 | 192.168.8.33/24 | - | HbaseRigionServer
|
||||
hdp-slave1
|
||||
```
|
||||
|
||||
### 部署 spark 集群
|
||||
### 部署 spark-on-yarn
|
||||
- 在 **hdp-X** 上执行如下操作
|
||||
- 下载 spark-3.3.4-bin-hadoop2.tgz,解压
|
||||
```bash
|
||||
@@ -272,8 +272,10 @@ hbase-rs1 | 192.168.8.33/24 | - | HbaseRigionServer
|
||||
|
||||
- 创建 $SPARK_HOME/conf/spark-env.sh,参考内容如下
|
||||
```bash
|
||||
export JAVA_HOME=/opt/jdk
|
||||
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
|
||||
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.fs.logDirectory=hdfs://hdp-nn:8020/spark-logs -Dspark.history.retainedApplications=30"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native
|
||||
```
|
||||
|
||||
### 格式化 namenode
|
||||
|
||||
@@ -10,11 +10,11 @@ categories: ["hadoop"]
|
||||
## 环境
|
||||
主机名 | 地址 | 数据目录 | 组件
|
||||
---- | ---- | ---- | ----
|
||||
hdp-nn | 192.168.8.1/24 | /data/hdp-nn | Namenode
|
||||
hdp-nn | 192.168.8.1/24 | /data/hdp-nn | Namenode Spark
|
||||
hdp-snn | 192.168.8.2/24 | /data/hdp-snn | SecondaryNamenode
|
||||
hdp-rm | 192.168.8.3/24 | - | ResourceManager
|
||||
hdp-worker0 | 192.168.8.10/24 | /data/hdp-dn | Datanode NodeManager
|
||||
hdp-worker1 | 192.168.8.11/24 | /data/hdp-dn | Datanode NodeManager
|
||||
hdp-worker0 | 192.168.8.10/24 | /data/hdp-dn | Datanode NodeManager Spark
|
||||
hdp-worker1 | 192.168.8.11/24 | /data/hdp-dn | Datanode NodeManager Spark
|
||||
hive-hs | 192.168.8.20/24 | - | HiveServer2 Tez
|
||||
hive-ms | 192.168.8.21/24 | - | HiveMetastore Tez
|
||||
|
||||
@@ -241,6 +241,52 @@ hive-ms | 192.168.8.21/24 | - | HiveMetastore Tez
|
||||
hdp-worker1
|
||||
```
|
||||
|
||||
|
||||
### 部署 spark-on-yarn
|
||||
- 在 **hdp-X** 上执行如下操作
|
||||
- 下载 spark-3.5.8-bin-hadoop3.tgz,解压
|
||||
```bash
|
||||
curl -LO https://archive.apache.org/dist/spark/spark-3.5.8/spark-3.5.8-bin-hadoop3.tgz
|
||||
tar zxf spark-3.5.8-bin-hadoop3.tgz
|
||||
mv spark-3.5.8-bin-hadoop3 /opt/spark
|
||||
```
|
||||
|
||||
- 配置 spark 环境变量
|
||||
```bash
|
||||
echo 'export SPARK_HOME=/opt/spark' > /etc/profile.d/spark.sh
|
||||
echo 'export PATH=$SPARK_HOME/bin:$PATH' >> /etc/profile.d/spark.sh
|
||||
# 不推荐把 $SPARK_HOME/sbin 加入环境变量 PATH,避免与 hadoop 冲突
|
||||
source /etc/profile.d/spark.sh
|
||||
```
|
||||
|
||||
- 修改 $HADOOP_HOME/etc/hadoop/yarn-site.xml,关闭 yarn 虚拟内存检查(已关闭)
|
||||
- 编辑 $HADOOP_HOME/etc/hadoop/capacity-scheduler.xml,修改内容如下
|
||||
```xml
|
||||
<configuration>
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.resource-calculator</name>
|
||||
<!-- <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value> -->
|
||||
<value>org.apache.hadoop.yarn.util.resource.DominantResourceCalculator</value>
|
||||
</description>
|
||||
</property>
|
||||
</configuration>
|
||||
```
|
||||
|
||||
- 创建 $SPARK_HOME/conf/spark-defaults.conf,参考内容如下
|
||||
```
|
||||
spark.master yarn
|
||||
spark.eventLog.enabled true
|
||||
spark.eventLog.dir hdfs://hdp-nn:8020/spark-logs
|
||||
```
|
||||
|
||||
- 创建 $SPARK_HOME/conf/spark-env.sh,参考内容如下
|
||||
```bash
|
||||
export JAVA_HOME=/opt/jdk
|
||||
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
|
||||
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.fs.logDirectory=hdfs://hdp-nn:8020/spark-logs -Dspark.history.retainedApplications=30"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native
|
||||
```
|
||||
|
||||
### 格式化 namenode
|
||||
- 在 **hdp-nn** 上执行如下操作
|
||||
```bash
|
||||
@@ -263,6 +309,34 @@ hive-ms | 192.168.8.21/24 | - | HiveMetastore Tez
|
||||
/opt/jdk/bin/jps
|
||||
```
|
||||
|
||||
### 启动 spark 日志服务
|
||||
- 在 **hdp-nn** 上执行如下操作
|
||||
- 创建 spark 日志目录
|
||||
```bash
|
||||
hdfs dfs -mkdir /spark-logs
|
||||
```
|
||||
|
||||
- 启动日志服务
|
||||
```bash
|
||||
/opt/spark/sbin/start-history-server.sh
|
||||
```
|
||||
|
||||
### 发布分布式计算任务
|
||||
- 客户端模式
|
||||
```bash
|
||||
spark-shell
|
||||
```
|
||||
|
||||
- 集群模式
|
||||
```bash
|
||||
spark-submit \
|
||||
--class org.apache.spark.examples.SparkPi \
|
||||
--deploy-mode cluster \
|
||||
$SPARK_HOME/examples/jars/spark-examples_2.12-3.5.8.jar
|
||||
```
|
||||
|
||||
- 浏览器访问 http://{spark 日志服务器}:18080 查看任务进度
|
||||
|
||||
---
|
||||
|
||||
## 部署 hive 集群
|
||||
|
||||
Reference in New Issue
Block a user