#!/usr/bin/env bash set -euo pipefail # 生产只读证据采集(Maven 发布包内与本脚本同置于部署根目录,与 start.sh / properties 同层): # - 进程参数/环境/工作目录 # - 本地配置文件 # - jcmd system properties # - 应用日志关键片段 # - Consul 健康与 KV 快照 # 最终输出 tar.gz,便于离线定位“配置来源 -> Ribbon 实例列表”问题。 APP_DIR="${1:-/data/cwos/cw-elevator-application-V1.0.0.20211103}" CONSUL_ADDR="${2:-10.0.22.102:8500}" OUT_ROOT="${3:-${APP_DIR}/evidence}" APP_NAME="${4:-elevator-app}" # 现场 JDK(生产 cwos-node 固定路径;不依赖 PATH) CWOS_JAVA_BIN="/data/cwos/java/bin" JAVA_BIN="${CWOS_JAVA_BIN}/java" JAR_BIN="${CWOS_JAVA_BIN}/jar" JCMD_BIN="${CWOS_JAVA_BIN}/jcmd" DATE_BIN="/bin/date" timestamp="$(${DATE_BIN} +%Y%m%d-%H%M%S)" OUT_DIR="${OUT_ROOT}/elevator-evidence-${timestamp}" mkdir -p "${OUT_DIR}" log() { echo "[collect] $*"; } log "APP_DIR=${APP_DIR}" log "CONSUL_ADDR=${CONSUL_ADDR}" log "OUT_DIR=${OUT_DIR}" log "JAVA_BIN=${JAVA_BIN} JAR_BIN=${JAR_BIN} JCMD_BIN=${JCMD_BIN}" PID="$(ps -ef | awk '/java/ && /cw-elevator-application/ && !/awk/ {print $2; exit}')" if [[ -z "${PID}" ]]; then echo "ERROR: 未找到 cw-elevator-application Java 进程" >&2 exit 1 fi log "PID=${PID}" echo "${PID}" > "${OUT_DIR}/pid.txt" # 1) 进程与系统基础信息 ps -ef > "${OUT_DIR}/ps-ef.txt" uname -a > "${OUT_DIR}/uname.txt" ${DATE_BIN} +%Y-%m-%dT%H:%M:%S%z > "${OUT_DIR}/collected-at.txt" tr '\0' ' ' < "/proc/${PID}/cmdline" > "${OUT_DIR}/proc-cmdline.txt" || true tr '\0' '\n' < "/proc/${PID}/environ" > "${OUT_DIR}/proc-environ.txt" || true ls -l "/proc/${PID}/cwd" > "${OUT_DIR}/proc-cwd.txt" || true # 2) 本地配置快照(若存在) for f in bootstrap.properties application.properties application-access-control.properties start.sh stop.sh cw-elevator-application.service; do if [[ -f "${APP_DIR}/${f}" ]]; then cp -a "${APP_DIR}/${f}" "${OUT_DIR}/${f}" fi done # 3) JAR 与结构快照 JAR_PATH="$(awk '{print $1}' "${OUT_DIR}/proc-cmdline.txt" | sed 's/[[:space:]]*$//')" if [[ -f "${APP_DIR}/cw-elevator-application-V1.0.0.20211103.jar" ]]; then JAR_PATH="${APP_DIR}/cw-elevator-application-V1.0.0.20211103.jar" fi echo "${JAR_PATH}" > "${OUT_DIR}/jar-path.txt" if [[ -f "${JAR_PATH}" ]]; then sha256sum "${JAR_PATH}" > "${OUT_DIR}/jar.sha256.txt" || true if [[ -x "${JAR_BIN}" ]]; then "${JAR_BIN}" tf "${JAR_PATH}" > "${OUT_DIR}/jar-tf.txt" || true else echo "jar not found or not executable: ${JAR_BIN}" > "${OUT_DIR}/jar-tf.txt" fi unzip -p "${JAR_PATH}" application.properties > "${OUT_DIR}/jar-application.properties.txt" 2>/dev/null || true unzip -p "${JAR_PATH}" bootstrap.properties > "${OUT_DIR}/jar-bootstrap.properties.txt" 2>/dev/null || true fi # 4) jcmd system properties + attach 诊断(不修改应用配置;便于修复 AttachNotSupportedException) { echo "=== current shell user ===" id 2>/dev/null || true echo "=== target java process ===" ps -o user=,group=,pid=,args= -p "${PID}" 2>/dev/null || true PROC_USER="$(stat -c '%U' "/proc/${PID}" 2>/dev/null || echo "")" PROC_UID="$(stat -c '%u' "/proc/${PID}" 2>/dev/null || echo "")" echo "proc_owner=${PROC_USER} uid=${PROC_UID}" echo "=== /tmp hsperfdata (HotSpot perf counter; attach 相关) ===" if [[ -n "${PROC_USER}" && "${PROC_USER}" != "unknown" ]]; then HS="/tmp/hsperfdata_${PROC_USER}" if [[ -d "${HS}" ]]; then ls -la "${HS}" 2>/dev/null | head -30 || true ls -la "${HS}/${PID}" 2>/dev/null || echo "missing ${HS}/${PID}" else echo "no directory ${HS}" fi fi echo "=== cmdline tokens (attach / jdwp) ===" tr '\0' '\n' < "/proc/${PID}/cmdline" 2>/dev/null | grep -E 'DisableAttach|Attach|jdwp|agentpath' || echo "(none matched)" } > "${OUT_DIR}/jcmd-attach-diagnose.txt" 2>&1 if [[ -x "${JCMD_BIN}" ]]; then "${JCMD_BIN}" "${PID}" VM.system_properties > "${OUT_DIR}/jcmd-system-properties.txt" 2>&1 || true if grep -q 'AttachNotSupportedException\|Unable to open socket file' "${OUT_DIR}/jcmd-system-properties.txt" 2>/dev/null; then { echo "" echo "HINT: jcmd attach 失败常见原因:" echo " 1) 与 Java 进程不同用户执行 jcmd(请用与进程相同用户,例如: sudo -u ${JCMD_BIN} ${PID} VM.system_properties)" echo " 2) /tmp/hsperfdata_/ 缺失或权限异常" echo " 3) JVM 启动参数含 -XX:+DisableAttachMechanism(见 jcmd-attach-diagnose.txt 中 cmdline)" echo " 4) 进程非 HotSpot 或尚未完全初始化(极少见于长期运行的 Spring Boot)" } >> "${OUT_DIR}/jcmd-system-properties.txt" fi else echo "jcmd not found or not executable: ${JCMD_BIN}" > "${OUT_DIR}/jcmd-system-properties.txt" fi # 4b) java 版本(与现场 JDK 一致性的旁证) if [[ -x "${JAVA_BIN}" ]]; then "${JAVA_BIN}" -version > "${OUT_DIR}/java-version.txt" 2>&1 || true else echo "java not found or not executable: ${JAVA_BIN}" > "${OUT_DIR}/java-version.txt" fi # 5) 应用日志关键行 LOG_FILE="${APP_DIR}/logs/elevator-app.log" if [[ -f "${LOG_FILE}" ]]; then cp -a "${LOG_FILE}" "${OUT_DIR}/elevator-app.log.full" awk ' /CONFIG SOURCE PROBE START|CONFIG SOURCE PROBE END|probe key=|ConfigurationBasedServerList|Load balancer does not have available server|DynamicServerListLoadBalancer|ConsulServiceRegistry|Registering service with consul/ { print } ' "${LOG_FILE}" > "${OUT_DIR}/elevator-app.log.keylines.txt" fi # 6) Consul 快照 CURL="curl -sS --max-time 8" ${CURL} "http://${CONSUL_ADDR}/v1/health/service/${APP_NAME}?passing=true" > "${OUT_DIR}/consul-health-${APP_NAME}.json" || true for svc in cwos-portal ninca-common ninca-common-component-organization ninca-crk-std cloudwalk-device-thirdparty; do ${CURL} "http://${CONSUL_ADDR}/v1/health/service/${svc}?passing=true" > "${OUT_DIR}/consul-health-${svc}.json" || true done ${CURL} "http://${CONSUL_ADDR}/v1/kv/config/${APP_NAME}/data?raw" > "${OUT_DIR}/consul-kv-${APP_NAME}.properties" || true ${CURL} "http://${CONSUL_ADDR}/v1/kv/config/${APP_NAME},access-control/data?raw" > "${OUT_DIR}/consul-kv-${APP_NAME},access-control.properties" || true # 7) 现场可达性快照(已知主机名) for host in 0837a70b5fab47569391828f5feb2561 371bfca4972c43d2aefcf302d0a4a277 44700995ee904679a7ad5afddcf93bb5; do getent hosts "${host}" > "${OUT_DIR}/getent-${host}.txt" 2>&1 || true curl -I --max-time 5 "http://${host}:8089/" > "${OUT_DIR}/curl-head-${host}-8089.txt" 2>&1 || true done ARCHIVE="${OUT_DIR}.tar.gz" tar -czf "${ARCHIVE}" -C "$(dirname "${OUT_DIR}")" "$(basename "${OUT_DIR}")" log "DONE archive=${ARCHIVE}" echo "${ARCHIVE}"