You've already forked www.colben.cn
udpate
This commit is contained in:
@@ -233,7 +233,7 @@ hbase-rs1 | 192.168.8.33/24 | - | HbaseRigionServer
|
|||||||
hdp-slave1
|
hdp-slave1
|
||||||
```
|
```
|
||||||
|
|
||||||
### 部署 spark 集群
|
### 部署 spark-on-yarn
|
||||||
- 在 **hdp-X** 上执行如下操作
|
- 在 **hdp-X** 上执行如下操作
|
||||||
- 下载 spark-3.3.4-bin-hadoop2.tgz,解压
|
- 下载 spark-3.3.4-bin-hadoop2.tgz,解压
|
||||||
```bash
|
```bash
|
||||||
@@ -272,8 +272,10 @@ hbase-rs1 | 192.168.8.33/24 | - | HbaseRigionServer
|
|||||||
|
|
||||||
- 创建 $SPARK_HOME/conf/spark-env.sh,参考内容如下
|
- 创建 $SPARK_HOME/conf/spark-env.sh,参考内容如下
|
||||||
```bash
|
```bash
|
||||||
|
export JAVA_HOME=/opt/jdk
|
||||||
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
|
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
|
||||||
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.fs.logDirectory=hdfs://hdp-nn:8020/spark-logs -Dspark.history.retainedApplications=30"
|
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.fs.logDirectory=hdfs://hdp-nn:8020/spark-logs -Dspark.history.retainedApplications=30"
|
||||||
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native
|
||||||
```
|
```
|
||||||
|
|
||||||
### 格式化 namenode
|
### 格式化 namenode
|
||||||
|
|||||||
@@ -10,11 +10,11 @@ categories: ["hadoop"]
|
|||||||
## 环境
|
## 环境
|
||||||
主机名 | 地址 | 数据目录 | 组件
|
主机名 | 地址 | 数据目录 | 组件
|
||||||
---- | ---- | ---- | ----
|
---- | ---- | ---- | ----
|
||||||
hdp-nn | 192.168.8.1/24 | /data/hdp-nn | Namenode
|
hdp-nn | 192.168.8.1/24 | /data/hdp-nn | Namenode Spark
|
||||||
hdp-snn | 192.168.8.2/24 | /data/hdp-snn | SecondaryNamenode
|
hdp-snn | 192.168.8.2/24 | /data/hdp-snn | SecondaryNamenode
|
||||||
hdp-rm | 192.168.8.3/24 | - | ResourceManager
|
hdp-rm | 192.168.8.3/24 | - | ResourceManager
|
||||||
hdp-worker0 | 192.168.8.10/24 | /data/hdp-dn | Datanode NodeManager
|
hdp-worker0 | 192.168.8.10/24 | /data/hdp-dn | Datanode NodeManager Spark
|
||||||
hdp-worker1 | 192.168.8.11/24 | /data/hdp-dn | Datanode NodeManager
|
hdp-worker1 | 192.168.8.11/24 | /data/hdp-dn | Datanode NodeManager Spark
|
||||||
hive-hs | 192.168.8.20/24 | - | HiveServer2 Tez
|
hive-hs | 192.168.8.20/24 | - | HiveServer2 Tez
|
||||||
hive-ms | 192.168.8.21/24 | - | HiveMetastore Tez
|
hive-ms | 192.168.8.21/24 | - | HiveMetastore Tez
|
||||||
|
|
||||||
@@ -241,6 +241,52 @@ hive-ms | 192.168.8.21/24 | - | HiveMetastore Tez
|
|||||||
hdp-worker1
|
hdp-worker1
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### 部署 spark-on-yarn
|
||||||
|
- 在 **hdp-X** 上执行如下操作
|
||||||
|
- 下载 spark-3.5.8-bin-hadoop3.tgz,解压
|
||||||
|
```bash
|
||||||
|
curl -LO https://archive.apache.org/dist/spark/spark-3.5.8/spark-3.5.8-bin-hadoop3.tgz
|
||||||
|
tar zxf spark-3.5.8-bin-hadoop3.tgz
|
||||||
|
mv spark-3.5.8-bin-hadoop3 /opt/spark
|
||||||
|
```
|
||||||
|
|
||||||
|
- 配置 spark 环境变量
|
||||||
|
```bash
|
||||||
|
echo 'export SPARK_HOME=/opt/spark' > /etc/profile.d/spark.sh
|
||||||
|
echo 'export PATH=$SPARK_HOME/bin:$PATH' >> /etc/profile.d/spark.sh
|
||||||
|
# 不推荐把 $SPARK_HOME/sbin 加入环境变量 PATH,避免与 hadoop 冲突
|
||||||
|
source /etc/profile.d/spark.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
- 修改 $HADOOP_HOME/etc/hadoop/yarn-site.xml,关闭 yarn 虚拟内存检查(已关闭)
|
||||||
|
- 编辑 $HADOOP_HOME/etc/hadoop/capacity-scheduler.xml,修改内容如下
|
||||||
|
```xml
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.capacity.resource-calculator</name>
|
||||||
|
<!-- <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value> -->
|
||||||
|
<value>org.apache.hadoop.yarn.util.resource.DominantResourceCalculator</value>
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
```
|
||||||
|
|
||||||
|
- 创建 $SPARK_HOME/conf/spark-defaults.conf,参考内容如下
|
||||||
|
```
|
||||||
|
spark.master yarn
|
||||||
|
spark.eventLog.enabled true
|
||||||
|
spark.eventLog.dir hdfs://hdp-nn:8020/spark-logs
|
||||||
|
```
|
||||||
|
|
||||||
|
- 创建 $SPARK_HOME/conf/spark-env.sh,参考内容如下
|
||||||
|
```bash
|
||||||
|
export JAVA_HOME=/opt/jdk
|
||||||
|
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
|
||||||
|
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.fs.logDirectory=hdfs://hdp-nn:8020/spark-logs -Dspark.history.retainedApplications=30"
|
||||||
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native
|
||||||
|
```
|
||||||
|
|
||||||
### 格式化 namenode
|
### 格式化 namenode
|
||||||
- 在 **hdp-nn** 上执行如下操作
|
- 在 **hdp-nn** 上执行如下操作
|
||||||
```bash
|
```bash
|
||||||
@@ -263,6 +309,34 @@ hive-ms | 192.168.8.21/24 | - | HiveMetastore Tez
|
|||||||
/opt/jdk/bin/jps
|
/opt/jdk/bin/jps
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### 启动 spark 日志服务
|
||||||
|
- 在 **hdp-nn** 上执行如下操作
|
||||||
|
- 创建 spark 日志目录
|
||||||
|
```bash
|
||||||
|
hdfs dfs -mkdir /spark-logs
|
||||||
|
```
|
||||||
|
|
||||||
|
- 启动日志服务
|
||||||
|
```bash
|
||||||
|
/opt/spark/sbin/start-history-server.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### 发布分布式计算任务
|
||||||
|
- 客户端模式
|
||||||
|
```bash
|
||||||
|
spark-shell
|
||||||
|
```
|
||||||
|
|
||||||
|
- 集群模式
|
||||||
|
```bash
|
||||||
|
spark-submit \
|
||||||
|
--class org.apache.spark.examples.SparkPi \
|
||||||
|
--deploy-mode cluster \
|
||||||
|
$SPARK_HOME/examples/jars/spark-examples_2.12-3.5.8.jar
|
||||||
|
```
|
||||||
|
|
||||||
|
- 浏览器访问 http://{spark 日志服务器}:18080 查看任务进度
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 部署 hive 集群
|
## 部署 hive 集群
|
||||||
|
|||||||
Reference in New Issue
Block a user