k8s下部署ansible进行node-export二安装
下载node-exporter
https://github.com/prometheus/node_exporter/releases
下载ansible
https://github.com/ansible/ansible/releases
启动node-exporter service文件
node-exporter.service
[Unit]
Description=node_exporter
Requires=network.target remote-fs.target
After=network.target remote-fs.target
[Service]
Type=simple
ExecStart=/usr/local/bin/node_exporter --web.listen-address=:9100
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
RestartSec=5s
[Install]
WantedBy=multi-user.target
ansbile的hosts文件
[node-exporter]
192.168.56.123 ansible_ssh_port=22 ansible_ssh_user="root" ansible_ssh_pass="root" ansible_sudo_pass="root"
192.168.56.124 ansible_ssh_port=22 ansible_ssh_user="root" ansible_ssh_pass="root" ansible_sudo_pass="root"
远程普通用户若要使用 sudo 权限,需要在 /etc/ansible/ansible.cfg 进行配置,若远程是root用户则可跳过。
[privilege_escalation]
become=True
become_method=sudo
ansible的deploy文件
ansible-deploy.yaml
---
- hosts: node-exporter
become: yes
tasks:
- name: "Check if node-exporter is installed"
command: systemctl status node-exporter
register: node_exporter_check
ignore_errors: true
- name: "Print node-exporter status"
debug:
msg: "Node exporter is {{ 'running' if 'active (running)' in node_exporter_check.stdout else 'not running' }}"
- name: "Download and install node-exporter dir"
block:
- name: "copy node-exporter"
copy:
src: "{{item.src}}"
dest: "{{item.dest}}"
mode: 0755
with_items:
- {src: "{{ playbook_dir }}/node-exporter", dest: "/tmp/"}
- name: "copy node-exporter"
shell: sudo cp /tmp/node-exporter/node_exporter /usr/local/bin
notify:
- reload
- name: "copy node-exporter.service"
shell: sudo cp /tmp/node-exporter/node-exporter.service /usr/lib/systemd/system/
notify:
- reload
- name: "start and enable server"
shell: sudo systemctl start node-exporter.service && sudo systemctl enable node-exporter.service
when: node_exporter_check.rc != 0
handlers:
- name: reload
shell: sudo systemctl daemon-reload
---
- hosts: all
become: yes
serial: 30
gather_facts: yes # 如果你遇到locale问题,可能需要先解决或者临时设置为no
environment:
LC_ALL: C # 或者选择其他适合的值,比如 'en_US.UTF-8'
LANG: C # 确保与LC_ALL一致,避免冲突
tasks:
- name: "Check if node_exporter.service file exists"
stat:
path: /etc/systemd/system/node_exporter.service
register: node_exporter_file_check
- name: "Check if node_exporter.service file exists"
debug:
msg: "node_exporter.service is exists."
when: node_exporter_file_check.stat.exists
- name: "Check if node_exporter.service file exists"
shell: sudo systemctl disable node_exporter.service && sudo systemctl stop node_exporter.service && sudo rm -rf /etc/systemd/system/node_exporter.service
notify:
- reload
when: node_exporter_file_check.stat.exists
- name: "Check if node-exporter is installed with node-exporter"
command: systemctl status node-exporter
register: node_exporter_check
ignore_errors: true
- name: "Check if node-exporter is installed with node_exporter"
command: systemctl status node_exporter
register: node_exporter_check_ext
ignore_errors: true
- name: "Print node-exporter status running"
debug:
msg: "Node exporter is {{ 'running' if ( 'active (running)' in node_exporter_check.stdout or 'active (running)' in node_exporter_check_ext.stdout ) else 'not running' }}"
- name: "do install node-exporter"
block:
- name: "copy node-exporter"
copy:
src: "{{item.src}}"
dest: "{{item.dest}}"
mode: 0755
with_items:
- {src: "{{ playbook_dir }}/node-exporter", dest: "/tmp/"}
- name: "copy node-exporter"
shell: sudo cp /tmp/node-exporter/node_exporter /usr/local/bin
notify:
- reload
- name: "copy node-exporter.service"
shell: sudo cp /tmp/node-exporter/node-exporter.service /usr/lib/systemd/system/
notify:
- reload
- name: "start and enable server"
shell: sudo systemctl start node-exporter.service && sudo systemctl enable node-exporter.service
when: node_exporter_check.rc != 0 and node_exporter_check_ext.rc != 0
handlers:
- name: reload
shell: sudo systemctl daemon-reload
部署node-exporter
#!/bin/bash
#SOURCE_FILE="/etc/ansible/hosts"
## 备份文件存放目录
#BACKUP_DIR="/etc/ansible/backup"
#
## 删除超过的天数
#DAYS=10
## 使用 find 查找并删除10天前的备份文件
#find "$BACKUP_DIR" -type f -name "hosts_backup_*.bak" -mtime +$((DAYS-1)) -exec rm -f {} \;
#echo "Deleted backup files older than $DAYS days in $BACKUP_DIR"
#
## 检查目录是否存在
#if [ -d "$BACKUP_DIR" ]; then
# echo "Directory exists."
#else
# echo "Directory does not exist, create it."
# # 确保备份目录存在:创建目录
# mkdir -p "$BACKUP_DIR"
#fi
#
## 获取当前时间戳,格式为 YYYYMMDD_HHMMSS
#TIMESTAMP=$(date +%Y%m%d_%H%M%S)
## 备份文件
#cp "$SOURCE_FILE" "${BACKUP_DIR}/hosts_backup_${TIMESTAMP}.bak"
#echo "Backup created with timestamp: ${TIMESTAMP}"
#rm -rf "$SOURCE_FILE"
#cp hosts "/etc/ansible"
# 获取当前工作目录
CURRENT_DIR=$(pwd)
# 获取当前时间戳,格式为 YYYYMMDD_HHMMSS
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
# 检查某个子目录是否存在
HOSTS_FILE="$CURRENT_DIR/hosts"
LOG_DIR="$CURRENT_DIR/logs"
# 检查目录是否存在
if [ -d "$LOG_DIR" ]; then
echo "Directory exists."
else
echo "Directory does not exist, create it."
# 确保备份目录存在:创建目录
mkdir -p "$LOG_DIR"
fi
LOG_FILE="${LOG_DIR}/ansible_${TIMESTAMP}.log"
ansible-playbook -i "$HOSTS_FILE" ansible-deploy.yaml > "$LOG_FILE"
最终目录结构
ansible-deploy-node-exporter-x86
|-ansible-deploy.sh
|-ansible-deploy.yaml
|-hosts
|-node-exporter
|-node-exporter.service
|-node_exporter
构建ansible的docker镜像
FROM swr.cn-north-4.myhuaweicloud.com/ddn-k8s/gcr.io/iguazio/alpine:3.17
RUN apk add --no-cache openssh-client ansible bash sshpass shadow tzdata
# 将时区设置为 Asia/Shanghai
ENV TZ=Asia/Shanghai
# 创建本地时间的符号链接
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
# 创建非 root 用户
ARG USERNAME=ansibleuser
ARG USER_UID=1000
ARG USER_GID=$USER_UID
# 添加组和用户
RUN groupadd --gid $USER_GID $USERNAME && \
useradd --uid $USER_UID --gid $USER_GID -m -s /bin/bash $USERNAME
# 设置环境变量以使用新的临时目录
ENV ANSIBLE_LOCAL_TEMP="/home/$USERNAME/.ansible/tmp"
ENV ANSIBLE_REMOTE_TEMP="/home/$USERNAME/.ansible/tmp_remote"
# 创建并设置权限
RUN mkdir -p $ANSIBLE_LOCAL_TEMP $ANSIBLE_REMOTE_TEMP && \
chown -R $USERNAME:$USERNAME /home/$USERNAME/.ansible
# 切换到新创建的用户
USER $USERNAME
CMD ["sh", "-c", "sleep 3600"]