当前位置: 首页 > article >正文

xxl-job 整合 Seatunnel 实现定时任务

流处理

#!/bin/bash
SEATUNNEL_CMD="$SEATUNNEL_HOME/bin/seatunnel.sh"
SEATUNNEL_HOST=localhost
SEATUNNEL_PORT=5801

# 定义任务停止时执行的清理操作
exit_func() {
    # 在这里放入你希望在任务停止时执行的操作,比如释放资源、记录日志等
	$SEATUNNEL_CMD -can "$JOB_ID"
    exit;
}

# 捕获 SIGINT (Ctrl+C) 和 SIGTERM (手动终止) 信号
trap exit_func SIGINT SIGTERM SIGHUP SIGQUIT SIGKILL

# 将配置内容写入变量
config_content=$(cat <<EOL
env {
    "job.mode"=STREAMING
    "job.name"="SeaTunnel_Job"
    "savemode.execute.location"=CLUSTER
}
source {
    MySQL-CDC {
        "snapshot.split.size"="8096"
        "snapshot.fetch.size"="1024"
        "incremental.parallelism"="1"
        "connect.timeout.ms"="30000"
        "connect.max-retries"="3"
        "connection.pool.size"="20"
        "chunk-key.even-distribution.factor.lower-bound"="0.05"
        "chunk-key.even-distribution.factor.upper-bound"="100.0"
        "sample-sharding.threshold"="1000"
        "inverse-sampling.rate"="1000"
        "startup.mode"=INITIAL
        "exactly_once"="false"
        "stop.mode"=NEVER
        parallelism="1"
        "result_table_name"=Table15381274549824
        catalog {
            factory=Mysql
        }
        database-names=[
            "test_source"
        ]
        table-names=[
            "test_source.user"
        ]
        format=DEFAULT
        password="123456"
        username=root
        base-url="jdbc:mysql://127.0.0.1:3306/test_cdc"
        server-time-zone=UTC
    }
}
transform {
}
sink {
    Jdbc {
        "schema_save_mode"="CREATE_SCHEMA_WHEN_NOT_EXIST"
        "data_save_mode"="APPEND_DATA"
        "create_index"="true"
        "connection_check_timeout_sec"="30"
        "batch_size"="1000"
        "is_exactly_once"="false"
        "max_commit_attempts"="3"
        "transaction_timeout_sec"="-1"
        "max_retries"="0"
        "auto_commit"="true"
        "support_upsert_by_query_primary_key_exist"="false"
        "multi_table_sink_replica"="1"
        "source_table_name"=Table15381274549824
        "generate_sink_sql"=true
        database="test_jdbc"
        table=user
        driver="com.mysql.cj.jdbc.Driver"
        url="jdbc:mysql://127.0.0.1:3306/test_jdbc"
        password="123456"
        user=root
    }
}
EOL
)

echo "开始执行任务"
echo "--------    配置信息    --------------"
echo "$config_content"
echo "--------    end    --------------"

# 将配置内容写入标准输入并传递给 SeaTunnel
SUBJOB_OUTPUT=$(echo "$config_content" | $SEATUNNEL_CMD --config /dev/stdin --async 2>&1)
JOB_ID=$(echo "$SUBJOB_OUTPUT" | grep "job name" | awk -F'job id: ' '{print $2}' | awk -F',' '{print $1}')

echo "任务Id: $JOB_ID"

# 监控任务状态
while true; do
    STATUS_OUTPUT=$(curl -s http://$SEATUNNEL_HOST:$SEATUNNEL_PORT/hazelcast/rest/maps/job-info/$JOB_ID)
    echo $(date "+%Y-%m-%d %H:%M:%S.%3N") "写入数量 : "$(echo "$STATUS_OUTPUT" | awk -F'"SinkWriteCount":"' '{print $2}' | awk -F '","' '{print $1}')", 读取数量 :"$(echo "$STATUS_OUTPUT" | awk -F'"SourceReceivedCount":"' '{print $2}' | awk -F '","' '{print $1}')
    
	TASK_STATE=$(echo "$STATUS_OUTPUT" | awk -F'"jobStatus":"' '{print $2}' | awk -F '","' '{print $1}')

    if [[ "$TASK_STATE" == "FINISHED" ]]; then
        echo "任务完成, 状态: $TASK_STATE"
        exit 0
    fi
    
    if [[ "$TASK_STATE" != "RUNNING" ]]; then
        echo "任务已结束,状态:$TASK_STATE"
        exit 1
    else
        echo "任务运行中 ... 状态: $TASK_STATE"
        sleep 300
    fi
done



批处理

#!/bin/bash

SEATUNNEL_CMD="$SEATUNNEL_HOME/bin/seatunnel.sh"

# 定义任务停止时执行的清理操作
exit_func() {
    # 在这里放入你希望在任务停止时执行的操作,比如释放资源、记录日志等
	$SEATUNNEL_CMD -can "$JOB_ID"
    exit;
}

# 捕获 SIGINT (Ctrl+C) 和 SIGTERM (手动终止) 信号
trap exit_func SIGINT SIGTERM SIGHUP SIGQUIT SIGKILL


# 将配置内容写入变量
config_content=$(cat <<EOL
env {
  # You can set SeaTunnel environment configuration here
  parallelism = 2
  job.mode = "BATCH"
  checkpoint.interval = 10000
}

source {
  # This is a example source plugin **only for test and demonstrate the feature source plugin**
  FakeSource {
    parallelism = 2
    result_table_name = "fake"
    row.num = 16
    schema = {
      fields {
        name = "string"
        age = "int"
      }
    }
  }

  # If you would like to get more information about how to configure SeaTunnel and see full list of source plugins,
  # please go to https://seatunnel.apache.org/docs/connector-v2/source
}

sink {
  Console {
  }

  # If you would like to get more information about how to configure SeaTunnel and see full list of sink plugins,
  # please go to https://seatunnel.apache.org/docs/connector-v2/sink
}
EOL
)

echo "开始执行任务"
# 将配置内容写入标准输入并传递给 SeaTunnel
SUBJOB_OUTPUT=$(echo "$config_content" | $SEATUNNEL_CMD --config /dev/stdin --async 2>&1)
JOB_ID=$(echo "$SUBJOB_OUTPUT" | grep "job name" | awk -F'job id: ' '{print $2}' | awk -F',' '{print $1}')

echo "任务Id: $JOB_ID"

# 监控任务状态
while true; do
    # 查询任务状态
    STATUS_OUTPUT=$($SEATUNNEL_CMD -j "$JOB_ID" 2>&1)
    TASK_STATE=$(echo "$STATUS_OUTPUT" | grep "$JOB_ID" | awk -F'"jobStatus":"' '{print $2}' | awk -F '","' '{print $1}')

    if [[ "$TASK_STATE" == "FINISHED" ]]; then
        echo "任务完成, 状态: $TASK_STATE"
        exit 0
    fi
    # 检查任务是否已完成
    if [[ "$TASK_STATE" != "RUNNING" ]]; then
        echo "任务已结束,状态:$TASK_STATE"
        exit 1
    else
        echo "任务运行中 ... 状态: $TASK_STATE"
        # 等待 5 秒后再次查询
        sleep 5
    fi
done

http://www.kler.cn/a/442354.html

相关文章:

  • Bootstrap Blazor中使用PuppeteerSharp对HTML截图
  • 【嵌入式——QT】QT多线程编程
  • Halcon中dots_image(Operator)算子原理及应用详解
  • JumpServer开源堡垒机搭建及使用
  • 基于yolov10的遥感影像目标检测系统,支持图像检测,视频检测和实时摄像检测功能(pytorch框架,python源码)
  • 计算机网络技术基础:3.计算机网络的拓扑结构
  • 从零开始学TiDB(5)Raft与Multi Raft
  • 【Java】使用RSA进行数字签名详解(复制即用,内有详细注释)
  • 用 Python 从零开始创建神经网络(十七):回归(Regression)
  • 小程序转uniapp之setData
  • RabbitMQ镜像队列机制
  • 【WRF教程第3.4期】预处理系统 WPS 详解:以4.5版本为例
  • python IO编程:序列化
  • android 计算CRC
  • Windows开机黑屏|Windows开机黑屏只有鼠标|Windows开机不运行explorer
  • vue3实现商城系统详情页(前端实现)
  • 面试真题 | 虎牙 C++[20241218]
  • 5个小型多模态AI模型及其功能
  • 使用idea创建一个JAVA WEB项目
  • 小程序子组件调用父组件方法、父组件调用子组件方法