当前位置: 首页 > article >正文

Kubernetes运行大数据组件-运行spark

部署组件

● spark-historyserver
● spark-client

配置文件

kind: ConfigMap
apiVersion: v1
metadata:
  name: spark
data:
  spark-defaults.conf: |-
    spark.eventLog.enabled            true
    spark.eventLog.dir                hdfs://192.168.199.56:8020/eventLogs
    spark.eventLog.compress           true
    spark.serializer                  org.apache.spark.serializer.KryoSerializer
    spark.master                      yarn
    spark.driver.cores                1
    spark.driver.memory               2g
    spark.executor.cores              1
    spark.executor.memory             2g
    spark.executor.instances          1
    spark.sql.warehouse.dir           hdfs://192.168.199.56:8020/user/hive/warehouse
    spark.yarn.historyServer.address  192.168.199.58:18080
    spark.history.ui.port             18080
    spark.history.fs.logDirectory     hdfs://192.168.199.56:8020/eventLogs

spark-historyserver部署文件

apiVersion: apps/v1
kind: Deployment
metadata:
  name: spark-historyserver
  labels:
    app: spark-historyserver
spec:
  selector:
    matchLabels:
      app: spark-historyserver
  replicas: 1
  template:
    metadata:
      labels:
        app: spark-historyserver
    spec:
      initContainers:
        - name: init-sparklogs
          image: spark:3.0.2
          imagePullPolicy: IfNotPresent
          command:
            - "sh"
            - "-c"
            - "hadoop fs -ls /sparklogs;if [ $? -ne 0 ];then hadoop fs -mkdir /sparklogs && echo 'Create /sparklogs';fi"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: hadoop-config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: hadoop-config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
      containers:
        - name: spark-historyserver
          image: spark:3.0.2
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 1000m
              memory: 2Gi
          command:
            - "sh"
            - "-c"
            - "$SPARK_HOME/sbin/start-history-server.sh && tail -f $SPARK_HOME/logs/*"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: hadoop-config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: hadoop-config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: hadoop-config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: hadoop-config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
            - name: hive-config
              mountPath: /opt/spark/conf/hive-site.xml
              subPath: hive-site.xml
            - name: spark-config
              mountPath: /opt/spark/conf/spark-defaults.conf
              subPath: spark-defaults.conf
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "$SPARK_HOME/sbin/stop-history-server.sh"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: hadoop-config
          configMap:
            name: hadoop
        - name: hive-config
          configMap:
            name: hive
        - name: spark-config
          configMap:
            name: spark
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        spark-historyserver: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

spark-client部署文件

apiVersion: apps/v1
kind: Deployment
metadata:
  name: spark-client
  labels:
    app: spark-client
spec:
  selector:
    matchLabels:
      app: spark-client
  replicas: 1
  template:
    metadata:
      labels:
        app: spark-client
    spec:
      containers:
        - name: spark-client
          image: spark:3.0.2
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 1000m
              memory: 2Gi
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: hadoop-config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: hadoop-config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: hadoop-config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: hadoop-config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
            - name: hive-config
              mountPath: /opt/spark/conf/hive-site.xml
              subPath: hive-site.xml
            - name: spark-config
              mountPath: /opt/spark/conf/spark-defaults.conf
              subPath: spark-defaults.conf
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: hadoop-config
          configMap:
            name: hadoop
        - name: hive-config
          configMap:
            name: hive
        - name: spark-config
          configMap:
            name: spark
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        spark: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

注意:spark-client一般是集成到应用服务中使用。

部署historyserver和client

> kubectl.exe apply -f .\spark\
deployment.apps/spark-client created
configmap/spark created
deployment.apps/spark-historyserver created

spark-historyserver日志和web服务

在这里插入图片描述在这里插入图片描述
在这里插入图片描述

通过spark-client提交测试任务

在这里插入图片描述在这里插入图片描述

yarn中查看spark任务

在这里插入图片描述

spark-historyserver中查看任务

在这里插入图片描述在这里插入图片描述


http://www.kler.cn/a/378442.html

相关文章:

  • 3.CSS的背景
  • AI 编程工具—Cursor进阶使用 Rules for AI
  • 国产工作平替软件推荐
  • Linux搭建FTP详细流程
  • STM32+W5500+以太网应用开发+003_TCP服务器添加OLED(u8g2)显示状态
  • WebSocket 和 Socket 的区别
  • element plus中修改el-table的样式
  • JAVA语言多态和动态语言实现原理
  • 深度学习:反向传播算法简介
  • 一体化运维监控管理平台详解:构建高效运维体系
  • 如何通过OpenAI Gym学习强化学习
  • 乡村景区一体化系统(门票,餐饮,便利店,果园,娱乐,停车收费
  • 两个壁面之间夹一个圆柱形杆的温度分布
  • LeetCode 684.冗余连接:拓扑排序+哈希表(O(n)) 或 并查集(O(nlog n)-O(nα(n)))
  • 使用GetX实现GetPage中间件
  • WordPress在windows下安装
  • 【Git】从 GitHub 仓库中移除误提交的 IntelliJ IDEA 配置文件夹 .idea 并将其添加到 .gitignore 文件中
  • MyBatis-Plus快速入门:从安装到第一个Demo
  • React Native 0.76 重大更新:新架构全面启用
  • 基于Python的自然语言处理系列(47):DistilBERT:更小、更快、更省、更轻的BERT版本
  • C++编程法则365天一天一条(344)理解std::optional的设计初衷
  • 数据库日志分析 ApexSQLLog
  • 基于SSM+VUE历史车轮网站JAVA|VUE|Springboot计算机毕业设计源代码+数据库+LW文档+开题报告+答辩稿+部署教+代码讲解
  • Zypher Network:全栈式 Web3 游戏引擎,服务器抽象叙事的领导者
  • 2.若依vue表格数据根据不同状态显示不同颜色style
  • 【Flask框架】10、Flask项目拆分规范