Files
starRiverProperty/scripts/collect_elevator_runtime_evidence.sh
T
反编译工作区 8b15445328 feat: add service config templates and extraction script
Former-commit-id: 1de24b7eb79676d1aba9d799a58c5a753290cf52
2026-05-01 19:38:01 +08:00

150 lines
6.6 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
set -euo pipefail
# 生产只读证据采集(Maven 发布包内与本脚本同置于部署根目录,与 start.sh / properties 同层):
# - 进程参数/环境/工作目录
# - 本地配置文件
# - jcmd system properties
# - 应用日志关键片段
# - Consul 健康与 KV 快照
# 最终输出 tar.gz,便于离线定位“配置来源 -> Ribbon 实例列表”问题。
APP_DIR="${1:-/data/cwos/cw-elevator-application-V1.0.0.20211103}"
CONSUL_ADDR="${2:-10.0.22.102:8500}"
OUT_ROOT="${3:-${APP_DIR}/evidence}"
APP_NAME="${4:-elevator-app}"
# 现场 JDK(生产 cwos-node 固定路径;不依赖 PATH
CWOS_JAVA_BIN="/data/cwos/java/bin"
JAVA_BIN="${CWOS_JAVA_BIN}/java"
JAR_BIN="${CWOS_JAVA_BIN}/jar"
JCMD_BIN="${CWOS_JAVA_BIN}/jcmd"
DATE_BIN="/bin/date"
timestamp="$(${DATE_BIN} +%Y%m%d-%H%M%S)"
OUT_DIR="${OUT_ROOT}/elevator-evidence-${timestamp}"
mkdir -p "${OUT_DIR}"
log() { echo "[collect] $*"; }
log "APP_DIR=${APP_DIR}"
log "CONSUL_ADDR=${CONSUL_ADDR}"
log "OUT_DIR=${OUT_DIR}"
log "JAVA_BIN=${JAVA_BIN} JAR_BIN=${JAR_BIN} JCMD_BIN=${JCMD_BIN}"
PID="$(ps -ef | awk '/java/ && /cw-elevator-application/ && !/awk/ {print $2; exit}')"
if [[ -z "${PID}" ]]; then
echo "ERROR: 未找到 cw-elevator-application Java 进程" >&2
exit 1
fi
log "PID=${PID}"
echo "${PID}" > "${OUT_DIR}/pid.txt"
# 1) 进程与系统基础信息
ps -ef > "${OUT_DIR}/ps-ef.txt"
uname -a > "${OUT_DIR}/uname.txt"
${DATE_BIN} +%Y-%m-%dT%H:%M:%S%z > "${OUT_DIR}/collected-at.txt"
tr '\0' ' ' < "/proc/${PID}/cmdline" > "${OUT_DIR}/proc-cmdline.txt" || true
tr '\0' '\n' < "/proc/${PID}/environ" > "${OUT_DIR}/proc-environ.txt" || true
ls -l "/proc/${PID}/cwd" > "${OUT_DIR}/proc-cwd.txt" || true
# 2) 本地配置快照(若存在)
for f in bootstrap.properties application.properties application-access-control.properties start.sh stop.sh cw-elevator-application.service; do
if [[ -f "${APP_DIR}/${f}" ]]; then
cp -a "${APP_DIR}/${f}" "${OUT_DIR}/${f}"
fi
done
# 3) JAR 与结构快照
JAR_PATH="$(awk '{print $1}' "${OUT_DIR}/proc-cmdline.txt" | sed 's/[[:space:]]*$//')"
if [[ -f "${APP_DIR}/cw-elevator-application-V1.0.0.20211103.jar" ]]; then
JAR_PATH="${APP_DIR}/cw-elevator-application-V1.0.0.20211103.jar"
fi
echo "${JAR_PATH}" > "${OUT_DIR}/jar-path.txt"
if [[ -f "${JAR_PATH}" ]]; then
sha256sum "${JAR_PATH}" > "${OUT_DIR}/jar.sha256.txt" || true
if [[ -x "${JAR_BIN}" ]]; then
"${JAR_BIN}" tf "${JAR_PATH}" > "${OUT_DIR}/jar-tf.txt" || true
else
echo "jar not found or not executable: ${JAR_BIN}" > "${OUT_DIR}/jar-tf.txt"
fi
unzip -p "${JAR_PATH}" application.properties > "${OUT_DIR}/jar-application.properties.txt" 2>/dev/null || true
unzip -p "${JAR_PATH}" bootstrap.properties > "${OUT_DIR}/jar-bootstrap.properties.txt" 2>/dev/null || true
fi
# 4) jcmd system properties + attach 诊断(不修改应用配置;便于修复 AttachNotSupportedException
{
echo "=== current shell user ==="
id 2>/dev/null || true
echo "=== target java process ==="
ps -o user=,group=,pid=,args= -p "${PID}" 2>/dev/null || true
PROC_USER="$(stat -c '%U' "/proc/${PID}" 2>/dev/null || echo "")"
PROC_UID="$(stat -c '%u' "/proc/${PID}" 2>/dev/null || echo "")"
echo "proc_owner=${PROC_USER} uid=${PROC_UID}"
echo "=== /tmp hsperfdata (HotSpot perf counter; attach 相关) ==="
if [[ -n "${PROC_USER}" && "${PROC_USER}" != "unknown" ]]; then
HS="/tmp/hsperfdata_${PROC_USER}"
if [[ -d "${HS}" ]]; then
ls -la "${HS}" 2>/dev/null | head -30 || true
ls -la "${HS}/${PID}" 2>/dev/null || echo "missing ${HS}/${PID}"
else
echo "no directory ${HS}"
fi
fi
echo "=== cmdline tokens (attach / jdwp) ==="
tr '\0' '\n' < "/proc/${PID}/cmdline" 2>/dev/null | grep -E 'DisableAttach|Attach|jdwp|agentpath' || echo "(none matched)"
} > "${OUT_DIR}/jcmd-attach-diagnose.txt" 2>&1
if [[ -x "${JCMD_BIN}" ]]; then
"${JCMD_BIN}" "${PID}" VM.system_properties > "${OUT_DIR}/jcmd-system-properties.txt" 2>&1 || true
if grep -q 'AttachNotSupportedException\|Unable to open socket file' "${OUT_DIR}/jcmd-system-properties.txt" 2>/dev/null; then
{
echo ""
echo "HINT: jcmd attach 失败常见原因:"
echo " 1) 与 Java 进程不同用户执行 jcmd(请用与进程相同用户,例如: sudo -u <java_user> ${JCMD_BIN} ${PID} VM.system_properties"
echo " 2) /tmp/hsperfdata_<user>/<pid> 缺失或权限异常"
echo " 3) JVM 启动参数含 -XX:+DisableAttachMechanism(见 jcmd-attach-diagnose.txt 中 cmdline"
echo " 4) 进程非 HotSpot 或尚未完全初始化(极少见于长期运行的 Spring Boot"
} >> "${OUT_DIR}/jcmd-system-properties.txt"
fi
else
echo "jcmd not found or not executable: ${JCMD_BIN}" > "${OUT_DIR}/jcmd-system-properties.txt"
fi
# 4b) java 版本(与现场 JDK 一致性的旁证)
if [[ -x "${JAVA_BIN}" ]]; then
"${JAVA_BIN}" -version > "${OUT_DIR}/java-version.txt" 2>&1 || true
else
echo "java not found or not executable: ${JAVA_BIN}" > "${OUT_DIR}/java-version.txt"
fi
# 5) 应用日志关键行
LOG_FILE="${APP_DIR}/logs/elevator-app.log"
if [[ -f "${LOG_FILE}" ]]; then
cp -a "${LOG_FILE}" "${OUT_DIR}/elevator-app.log.full"
awk '
/CONFIG SOURCE PROBE START|CONFIG SOURCE PROBE END|probe key=|ConfigurationBasedServerList|Load balancer does not have available server|DynamicServerListLoadBalancer|ConsulServiceRegistry|Registering service with consul/ { print }
' "${LOG_FILE}" > "${OUT_DIR}/elevator-app.log.keylines.txt"
fi
# 6) Consul 快照
CURL="curl -sS --max-time 8"
${CURL} "http://${CONSUL_ADDR}/v1/health/service/${APP_NAME}?passing=true" > "${OUT_DIR}/consul-health-${APP_NAME}.json" || true
for svc in cwos-portal ninca-common ninca-common-component-organization ninca-crk-std cloudwalk-device-thirdparty; do
${CURL} "http://${CONSUL_ADDR}/v1/health/service/${svc}?passing=true" > "${OUT_DIR}/consul-health-${svc}.json" || true
done
${CURL} "http://${CONSUL_ADDR}/v1/kv/config/${APP_NAME}/data?raw" > "${OUT_DIR}/consul-kv-${APP_NAME}.properties" || true
${CURL} "http://${CONSUL_ADDR}/v1/kv/config/${APP_NAME},access-control/data?raw" > "${OUT_DIR}/consul-kv-${APP_NAME},access-control.properties" || true
# 7) 现场可达性快照(已知主机名)
for host in 0837a70b5fab47569391828f5feb2561 371bfca4972c43d2aefcf302d0a4a277 44700995ee904679a7ad5afddcf93bb5; do
getent hosts "${host}" > "${OUT_DIR}/getent-${host}.txt" 2>&1 || true
curl -I --max-time 5 "http://${host}:8089/" > "${OUT_DIR}/curl-head-${host}-8089.txt" 2>&1 || true
done
ARCHIVE="${OUT_DIR}.tar.gz"
tar -czf "${ARCHIVE}" -C "$(dirname "${OUT_DIR}")" "$(basename "${OUT_DIR}")"
log "DONE archive=${ARCHIVE}"
echo "${ARCHIVE}"