-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnas_monitor.sh
More file actions
executable file
·111 lines (98 loc) · 3.02 KB
/
nas_monitor.sh
File metadata and controls
executable file
·111 lines (98 loc) · 3.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/bin/bash
# 配置参数
MOUNT_POINT="/nas"
TEST_FILE="${MOUNT_POINT}/.nas_healthcheck"
LOG_FILE="/var/log/nas_monitor.log"
LOCK_FILE="/var/lock/nas_monitor.lock"
NAS_IP="172.23.148.200"
# 确保日志文件存在且有正确权限
[[ -f "$LOG_FILE" ]] || { touch "$LOG_FILE"; chmod 640 "$LOG_FILE"; }
# 日志记录函数
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "${LOG_FILE}"
}
# flock 防止重复执行脚本
exec 9>"${LOCK_FILE}"
if ! flock -n 9; then
log "flock failed: Another instance is already running - exiting"
exit 1
fi
# 检查NAS响应
check_nas_inner() {
# 1. 检查基础网络连通性
if ! ping -c1 -W2 "${NAS_IP}" >/dev/null 2>&1; then
msg="network unreachable"
return 1
fi
# 2. 检查NFS服务状态
if ! timeout 5s rpcinfo -t "${NAS_IP}" nfs >/dev/null 2>&1; then
msg="service not responding"
return 1
fi
# 3. 测试文件操作(带超时)
# if not mounted, MOUNT_POINT is local, touch will success
if ! timeout 5s touch "${TEST_FILE}" >/dev/null 2>&1; then
msg="file operation timed out"
return 1
fi
return 0
}
# 如果失败,重试10次,每次间隔15秒;如果成功,直接返回
check_nas() {
local max_attempts=10
for attempt in $(seq 1 $max_attempts); do
if check_nas_inner; then
(( attempt > 1 )) && log "NAS check succeeded on attempt ${attempt}"
return 0
fi
log "NAS check failed: ${msg} (attempt ${attempt}/${max_attempts})"
sleep 15
done
return 1
}
# 检查挂载点状态
is_mounted() {
timeout 5s mountpoint -q "${MOUNT_POINT}"
ret=$?
(( ret == 124 )) && ret=0 # 124 is timeout, treat as mounted
return $ret
}
# 主逻辑
if is_mounted; then
if check_nas; then # 已挂载 - 检查NAS是否可用
# 已挂载且NAS可用 - 无操作
:
else
# 已挂载但NAS不可用 - 尝试卸载
log "NAS unavailable but mounted - unmounting..."
# 尝试正常卸载
timeout 10s umount "${MOUNT_POINT}" 2>/dev/null
if is_mounted; then # 检查是否卸载成功
# 正常卸载失败 - 尝试强制卸载
log "Normal umount failed - forcing lazy unmount"
umount -l "${MOUNT_POINT}"
# 检查最终状态
if is_mounted; then
log "Unmount failed completely!"
else
log "Unmounted successfully (lazy)"
fi
else
log "Unmounted successfully"
fi
fi
else
if check_nas_inner; then # 未挂载 - 不重试检查NAS
# 未挂载但NAS可用 - 尝试挂载
log "NAS available but not mounted - mounting..."
mount -a
if is_mounted; then
log "Mount successful"
else
log "Mount failed!"
fi
else
# 未挂载且NAS不可用 - 无操作
log "NAS check failed: ${msg}. NAS unavailable and not mounted - no action"
fi
fi