fix: relocate cwos-portal decompiled output to correct path; remove nested directory

Former-commit-id: dc30d42a8c55ed8b2382a41dc2434233fbed9930
This commit is contained in:
反编译工作区
2026-04-29 12:09:48 +08:00
parent ea8e492076
commit e8672a3c7b
1759 changed files with 547735 additions and 280 deletions
@@ -0,0 +1,196 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
生成“贴合源码”的反编译代码树:
- 以 V1 CFR 反编译目录为骨架;
- 若同路径在 V2 Maven 源码存在,则用 V2 源码覆盖该文件;
- 若 V2 不存在,则保留 V1 CFR 文件;
- 同时补充 V2 独有文件(V1 中无)。
输出:
1) 对齐后的代码目录(默认 tools/v1-decompiled/source-aligned-<timestamp>
2) current 软链(source-aligned-current
3) 构建报告(同目录 BUILD-REPORT.md / BUILD-REPORT.json
"""
from __future__ import annotations
import argparse
import json
import os
import shutil
from datetime import datetime, timezone
from pathlib import Path
SCR = Path(__file__).resolve()
MV_ROOT = Path(os.environ.get("MV_ROOT", SCR.parents[3])).resolve()
DEFAULT_V1 = (MV_ROOT / "tools" / "v1-decompiled" / "cfr-from-cw-lib-current").resolve()
PAIR = [
("cw-elevator-application-common", "cw-elevator-application-common-1.0-SNAPSHOT"),
("cw-elevator-application-data", "cw-elevator-application-data-1.0-SNAPSHOT"),
("cw-elevator-application-service", "cw-elevator-application-service-1.0-SNAPSHOT"),
("cw-elevator-application-web", "cw-elevator-application-web-1.0-SNAPSHOT"),
]
def copy_file(src: Path, dst: Path) -> None:
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dst)
def rel_java_map(root: Path) -> dict[str, Path]:
return {str(p.relative_to(root)).replace("\\", "/"): p for p in root.rglob("*.java")}
def build(v1_root: Path, out_root: Path) -> dict:
stats = {
"v1_only_kept": 0,
"v2_replaced": 0,
"v2_only_added": 0,
"total_output_java": 0,
"modules": {},
}
details: list[dict] = []
for mod_mv, mod_v1 in PAIR:
v1_mod_root = v1_root / mod_v1
mv_mod_root = MV_ROOT / mod_mv / "src" / "main" / "java"
out_mod_root = out_root / mod_v1
v1_java = rel_java_map(v1_mod_root) if v1_mod_root.is_dir() else {}
mv_java = rel_java_map(mv_mod_root) if mv_mod_root.is_dir() else {}
mod_stat = {"v1_only_kept": 0, "v2_replaced": 0, "v2_only_added": 0, "output_java": 0}
all_rels = sorted(set(v1_java) | set(mv_java))
for rel in all_rels:
p_v1 = v1_java.get(rel)
p_v2 = mv_java.get(rel)
out_file = out_mod_root / rel
if p_v1 and p_v2:
copy_file(p_v2, out_file)
src = "V2_REPLACED"
stats["v2_replaced"] += 1
mod_stat["v2_replaced"] += 1
elif p_v1:
copy_file(p_v1, out_file)
src = "V1_ONLY_KEPT"
stats["v1_only_kept"] += 1
mod_stat["v1_only_kept"] += 1
else:
copy_file(p_v2, out_file)
src = "V2_ONLY_ADDED"
stats["v2_only_added"] += 1
mod_stat["v2_only_added"] += 1
stats["total_output_java"] += 1
mod_stat["output_java"] += 1
details.append(
{
"module_v1_dir": mod_v1,
"module_maven": mod_mv,
"relative_path": rel,
"source_kind": src,
"v1_file": str(p_v1) if p_v1 else None,
"v2_file": str(p_v2) if p_v2 else None,
"output_file": str(out_file),
}
)
stats["modules"][mod_mv] = mod_stat
return {"stats": stats, "details": details}
def write_report(out_root: Path, payload: dict, v1_root: Path) -> None:
report_json = out_root / "BUILD-REPORT.json"
report_md = out_root / "BUILD-REPORT.md"
report_json.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
s = payload["stats"]
lines = [
"# Source-Aligned Decompiled Build Report",
"",
f"- UTC: `{datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')}`",
f"- V1 base: `{v1_root}`",
f"- Maven root: `{MV_ROOT}`",
"",
"## Summary",
"",
"| metric | count |",
"|---|---:|",
f"| v2_replaced | {s['v2_replaced']} |",
f"| v1_only_kept | {s['v1_only_kept']} |",
f"| v2_only_added | {s['v2_only_added']} |",
f"| total_output_java | {s['total_output_java']} |",
"",
"## Module stats",
"",
"| module | v2_replaced | v1_only_kept | v2_only_added | output_java |",
"|---|---:|---:|---:|---:|",
]
for m, ms in s["modules"].items():
lines.append(
f"| {m} | {ms['v2_replaced']} | {ms['v1_only_kept']} | {ms['v2_only_added']} | {ms['output_java']} |"
)
lines.append("")
lines.append("## Notes")
lines.append("")
lines.append("- 该目录可视作“源码优先的反编译校正版”:同路径优先采用 Maven 源码。")
lines.append("- 若你要严格保留历史 jar 语义,请结合 `details` 过滤 `V2_REPLACED`/`V2_ONLY_ADDED`。")
report_md.write_text("\n".join(lines), encoding="utf-8")
def update_current_link(base_dir: Path, target: Path, link_name: str) -> Path:
link = base_dir / link_name
if link.exists() or link.is_symlink():
link.unlink()
link.symlink_to(target.name)
return link
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--v1-root", default=str(DEFAULT_V1), help="V1 CFR 根目录")
ap.add_argument("--out-dir", default="", help="输出目录;默认自动生成时间戳目录")
ap.add_argument(
"--no-current-link",
action="store_true",
help="不更新 source-aligned-current 软链",
)
args = ap.parse_args()
v1_root = Path(args.v1_root).resolve()
if not v1_root.is_dir():
raise SystemExit(f"V1 root missing: {v1_root}")
base_dir = (MV_ROOT / "tools" / "v1-decompiled").resolve()
base_dir.mkdir(parents=True, exist_ok=True)
if args.out_dir:
out_root = Path(args.out_dir).resolve()
else:
ts = datetime.now().strftime("%Y%m%d-%H%M%S")
out_root = base_dir / f"source-aligned-{ts}"
out_root.mkdir(parents=True, exist_ok=True)
payload = build(v1_root=v1_root, out_root=out_root)
write_report(out_root=out_root, payload=payload, v1_root=v1_root)
if not args.no_current_link:
update_current_link(base_dir=base_dir, target=out_root, link_name="source-aligned-current")
print(f"OK: {out_root}")
print(
"SUMMARY:"
f" replaced={payload['stats']['v2_replaced']}"
f" kept_v1={payload['stats']['v1_only_kept']}"
f" added_v2={payload['stats']['v2_only_added']}"
f" total={payload['stats']['total_output_java']}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())
@@ -0,0 +1,118 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
javap -c 文本在「行级归一」之后的二次归一:折叠布局等价序列、依赖升级导致的等价 API 形态。
供 bytecode_parity_rootcause / export_impl_diff_evidence 共用。
"""
from __future__ import annotations
import re
def _collapse_double_goto(result: str) -> str:
while True:
n2 = re.sub(r"(?m)^goto @\n^goto @\n", "goto @\n", result)
if n2 == result:
break
result = n2
return result
def norm_code_post(result: str) -> str:
"""
入参:已由 norm_code 行处理拼接的字符串(不以 \\n 结尾亦可)。
"""
# --- 1) 条件 + 紧跟 goto @(蹦床):折叠为反条件单指令,对齐 javac 两种发射形态 ---
branch_pairs = [
(r"(?m)^ifnonnull @\n^goto @\n", "ifnull @\n"),
(r"(?m)^if_icmpne @\n^goto @\n", "if_icmpeq @\n"),
(r"(?m)^ifne @\n^goto @\n", "ifeq @\n"),
(r"(?m)^if_icmpeq @\n^goto @\n", "if_icmpne @\n"),
]
for pat, repl in branch_pairs:
result = re.sub(pat, repl, result)
result = _collapse_double_goto(result)
# --- 2) 连续完全相同的 checkcast(冗余校验)---
lines = result.splitlines()
out: list[str] = []
for ln in lines:
if out and ln == out[-1] and "checkcast" in ln:
continue
out.append(ln)
result = "\n".join(out)
# --- 3) String.join 前对 null 入参的 checkcast(仅一版 javac 发射)---
result = re.sub(
r"(?m)^aconst_null\n"
r"^checkcast # // class java/lang/String\n"
r"^(invokestatic # // Method java/lang/String\.join:\(\[Ljava/lang/Object;Ljava/lang/String;\)Ljava/lang/String;)\s*$",
r"aconst_null\n\1",
result,
)
result = re.sub(
r"(?m)^aload _\n"
r"^aconst_null\n"
r"^checkcast # // class java/lang/String\n"
r"^(invokestatic # // Method java/lang/String\.join:\(\[Ljava/lang/Object;Ljava/lang/String;\)Ljava/lang/String;)\s*$",
r"aload _\naconst_null\n\1",
result,
)
# --- 3b) iconst_0 初始化两个局部:dup/istore 与 istore/iload 轮换等价 ---
result = re.sub(
r"(?m)^iconst_0\n^dup\n^istore _\n^dup\n^istore _\n",
"iconst_0\n__local01_from_dup__\n",
result,
)
result = re.sub(
r"(?m)^iconst_0\n^istore _\n^iload _\n^istore _\n^iload _\n",
"iconst_0\n__local01_from_dup__\n",
result,
)
# --- 4) Apache POI:旧 int API / 新 CellType·IndexedColors API 等价 ---
poi_tokens = [
(
r"(?m)^iconst_1\n^invokeinterface # // InterfaceMethod org/apache/poi/ss/usermodel/Cell\.setCellType:\(I\)V\s*$",
"__poi_Cell_setCellType_STRING__",
),
(
r"(?m)^getstatic # // Field org/apache/poi/ss/usermodel/CellType\.STRING:.+\n"
r"^invokeinterface # // InterfaceMethod org/apache/poi/ss/usermodel/Cell\.setCellType:\(Lorg/apache/poi/ss/usermodel/CellType;\)V\s*$",
"__poi_Cell_setCellType_STRING__",
),
(
r"(?m)^iconst_1\n^invokevirtual # // Method org/apache/poi/hssf/usermodel/HSSFCell\.setCellType:\(I\)V\s*$",
"__poi_HSSFCell_setCellType_STRING__",
),
(
r"(?m)^getstatic # // Field org/apache/poi/ss/usermodel/CellType\.STRING:.+\n"
r"^invokevirtual # // Method org/apache/poi/hssf/usermodel/HSSFCell\.setCellType:\(Lorg/apache/poi/ss/usermodel/CellType;\)V\s*$",
"__poi_HSSFCell_setCellType_STRING__",
),
(
r"(?m)^sipush 700\n^invokevirtual # // Method org/apache/poi/hssf/usermodel/HSSFFont\.setBoldweight:\(S\)V\s*$",
"__poi_HSSFFont_bold__",
),
(
r"(?m)^iconst_1\n^invokevirtual # // Method org/apache/poi/hssf/usermodel/HSSFFont\.setBold:\(Z\)V\s*$",
"__poi_HSSFFont_bold__",
),
(
r"(?m)^sipush 32767\n^invokevirtual # // Method org/apache/poi/hssf/usermodel/HSSFFont\.setColor:\(S\)V\s*$",
"__poi_HSSFFont_setColor_RED__",
),
(
r"(?m)^getstatic # // Field org/apache/poi/ss/usermodel/IndexedColors\.RED:.+\n"
r"^invokevirtual # // Method org/apache/poi/ss/usermodel/IndexedColors\.getIndex:\(\)S\n"
r"^invokevirtual # // Method org/apache/poi/hssf/usermodel/HSSFFont\.setColor:\(S\)V\s*$",
"__poi_HSSFFont_setColor_RED__",
),
]
for pat, tok in poi_tokens:
result = re.sub(pat, tok, result)
result = _collapse_double_goto(result)
return result
@@ -0,0 +1,331 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
V1 原始 jar vs V2 编译 jar 的字节码多维对比(根因归类)
维度:
1) 类存在性:ONLY_V1 / ONLY_V2
2) API 签名:javap -p -s 归一化后 hash
3) 方法实现:javap -c -p 归一化后 hash
说明:
- 仅比较 cn/cloudwalk/elevator/** 下 class
- 忽略常量池索引号、栈帧偏移等不稳定文本
- 行级归一见 `norm_code`,二次归一见同目录 `bytecode_norm_post.norm_code_post`
- 输出 Markdown + JSON,便于复盘与持续回归
"""
from __future__ import annotations
import argparse
import hashlib
import json
import re
import subprocess
from collections import Counter, defaultdict
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from bytecode_norm_post import norm_code_post
REPO_ROOT = Path(__file__).resolve().parents[3]
V1_ROOT = REPO_ROOT.parent / "cw-elevator-application-V1.0.0.20211103" / "cw_lib"
PAIR = [
("cw-elevator-application-common", "cw-elevator-application-common"),
("cw-elevator-application-data", "cw-elevator-application-data"),
("cw-elevator-application-service", "cw-elevator-application-service"),
("cw-elevator-application-web", "cw-elevator-application-web"),
]
@dataclass
class ClassCompare:
module: str
class_name: str
status: str
api_equal: bool | None = None
code_equal: bool | None = None
api_hash_v1: str | None = None
api_hash_v2: str | None = None
code_hash_v1: str | None = None
code_hash_v2: str | None = None
def shasum(text: str) -> str:
return hashlib.sha256(text.encode("utf-8")).hexdigest()
def run(cmd: list[str]) -> str:
return subprocess.check_output(cmd, text=True, stderr=subprocess.DEVNULL)
def jar_classes(jar_file: Path) -> set[str]:
lines = run(["jar", "tf", str(jar_file)]).splitlines()
out: set[str] = set()
for ln in lines:
if not ln.endswith(".class"):
continue
if "$" in ln:
# 内部类单独比较噪声较大,先跳过;主类稳定后可再扩展
continue
if not ln.startswith("cn/cloudwalk/elevator/"):
continue
out.add(ln[:-6].replace("/", "."))
return out
def norm_api(text: str) -> str:
"""
取 javap -p -s 中成员声明 + descriptor,将「成员行 + 下一行 descriptor:」合并为一条后排序,
避免 Lombok/编译器仅改变类内声明顺序时误判为 API 变更。
"""
kept = []
for ln in text.splitlines():
s = ln.strip()
if not s:
continue
if s.startswith("Compiled from"):
continue
if s.startswith("Classfile "):
continue
if s.startswith("flags:"):
continue
if s.startswith("descriptor:") or s.endswith(";") or s.startswith("public ") or s.startswith("protected ") or s.startswith("private "):
kept.append(s)
merged: list[str] = []
i = 0
while i < len(kept):
if i + 1 < len(kept) and kept[i + 1].startswith("descriptor:"):
merged.append(kept[i] + "\t" + kept[i + 1])
i += 2
else:
merged.append(kept[i])
i += 1
merged.sort()
return "\n".join(merged)
def norm_code(text: str) -> str:
out = []
skip_exc = False
for ln in text.splitlines():
s = ln.rstrip()
if not s:
continue
if s.startswith("Compiled from"):
continue
if s.startswith("Classfile "):
continue
# Exception table 仅偏移不同,与语义无关;整块跳过以免误判 IMPL_DIFF
if "Exception table:" in s:
skip_exc = True
continue
if skip_exc:
if re.match(r"^\s{2}public\s+", s) or re.match(r"^\s{2}private\s+", s) or re.match(r"^\s{2}protected\s+", s):
skip_exc = False
else:
continue
# 去掉偏移量与常量池索引,保留操作码与调用目标
s = re.sub(r"^\s*\d+:\s*", "", s)
s = re.sub(r"#\d+", "#", s)
# javap 的常量池注释对齐空格数量不稳定,压缩为空格规范形态
s = re.sub(r"#\s+//", "# //", s)
# 屏蔽 invokeinterface 参数位计数噪声
s = re.sub(r",\s+\d+\s+//", " //", s)
# 跳转目标偏移对语义无影响,归一化
s = re.sub(
r"\b(if(?:eq|ne|lt|gt|le|ge|null|nonnull)|if_icmp(?:eq|ne|lt|gt|le|ge)|if_acmp(?:eq|ne)|goto|jsr)\s+\d+\b",
r"\1 @",
s,
)
# 局部变量槽位号对语义无影响(编译器可重排)
s = re.sub(
r"\b([adfil]load|[adfil]store|aload|astore|iload|istore|lload|lstore|fload|fstore|dload|dstore)\s+\d+\b",
r"\1 _",
s,
)
# 归一化单字节槽位指令(如 aload_0 / istore_3
s = re.sub(r"\b([adfil]load|[adfil]store|aload|astore|iload|istore|lload|lstore|fload|fstore|dload|dstore)_[0-3]\b", r"\1 _", s)
# javap 输出为 iinc slot,const(含逗号);旧正则漏匹配会导致槽位噪声残留
s = re.sub(r"\biinc\s+\d+\s*,\s*(-?\d+)\b", r"iinc _ \1", s)
# ArrayList 通过接口类型调用与具体类调用语义一致,归一为 List 接口描述
s = re.sub(
r"invokevirtual\s+#\s+//\s*Method java/util/ArrayList\.(add|size)\b[^\n]*",
r"invokeinterface # // InterfaceMethod java/util/List.\1:(UNKNOWN)",
s,
)
s = re.sub(
r"invokeinterface\s+#\s+//\s*InterfaceMethod java/util/List\.(add|size)\b[^\n]*",
r"invokeinterface # // InterfaceMethod java/util/List.\1:(UNKNOWN)",
s,
)
# tableswitch/lookupswitch 去掉行首偏移后,残余纯数字行与 default:<off> 仅反映布局,语义无差别
st = s.strip()
if re.fullmatch(r"\d+", st):
s = "<switch_target>"
elif re.fullmatch(r"default:\s*\d+", st):
s = "default: <switch_target>"
out.append(s.strip())
return norm_code_post("\n".join(out))
def javap_api(jar_file: Path, class_name: str) -> str:
return run(["javap", "-classpath", str(jar_file), "-p", "-s", class_name])
def javap_code(jar_file: Path, class_name: str) -> str:
return run(["javap", "-classpath", str(jar_file), "-p", "-c", class_name])
def compare_module(module: str, v1_jar: Path, v2_jar: Path) -> list[ClassCompare]:
v1_classes = jar_classes(v1_jar)
v2_classes = jar_classes(v2_jar)
records: list[ClassCompare] = []
for cls in sorted(v1_classes | v2_classes):
if cls not in v2_classes:
records.append(ClassCompare(module=module, class_name=cls, status="ONLY_V1"))
continue
if cls not in v1_classes:
records.append(ClassCompare(module=module, class_name=cls, status="ONLY_V2"))
continue
api1 = norm_api(javap_api(v1_jar, cls))
api2 = norm_api(javap_api(v2_jar, cls))
code1 = norm_code(javap_code(v1_jar, cls))
code2 = norm_code(javap_code(v2_jar, cls))
api_h1, api_h2 = shasum(api1), shasum(api2)
code_h1, code_h2 = shasum(code1), shasum(code2)
api_eq = api_h1 == api_h2
code_eq = code_h1 == code_h2
if api_eq and code_eq:
st = "MATCH"
elif api_eq and not code_eq:
st = "IMPL_DIFF"
else:
st = "API_DIFF"
records.append(
ClassCompare(
module=module,
class_name=cls,
status=st,
api_equal=api_eq,
code_equal=code_eq,
api_hash_v1=api_h1,
api_hash_v2=api_h2,
code_hash_v1=code_h1,
code_hash_v2=code_h2,
)
)
return records
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument(
"--out-md",
default=str(REPO_ROOT / "tools/elevator_api_parity/report/V1-V2-BYTECODE-ROOTCAUSE.md"),
)
ap.add_argument(
"--out-json",
default=str(REPO_ROOT / "tools/elevator_api_parity/report/V1-V2-BYTECODE-ROOTCAUSE.json"),
)
args = ap.parse_args()
all_records: list[ClassCompare] = []
for mod_short, mod_art in PAIR:
v1_jar = V1_ROOT / f"{mod_art}-1.0-SNAPSHOT.jar"
v2_jar = REPO_ROOT / mod_short / "target" / f"{mod_art}-2.0-SNAPSHOT.jar"
if not v1_jar.is_file() or not v2_jar.is_file():
raise SystemExit(f"missing jar for module {mod_short}: v1={v1_jar} v2={v2_jar}")
all_records.extend(compare_module(mod_short, v1_jar=v1_jar, v2_jar=v2_jar))
counter = Counter(r.status for r in all_records)
mod_counter: dict[str, Counter] = defaultdict(Counter)
for r in all_records:
mod_counter[r.module][r.status] += 1
payload = {
"generated_at_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
"summary": dict(counter),
"module_summary": {k: dict(v) for k, v in mod_counter.items()},
"records": [r.__dict__ for r in all_records],
}
out_json = Path(args.out_json)
out_json.parent.mkdir(parents=True, exist_ok=True)
out_json.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
md = []
md.append("# V1 vs V2 字节码根因分析")
md.append("")
md.append(f"- 生成时间(UTC):`{payload['generated_at_utc']}`")
md.append("- 对比范围:四模块 `cn/cloudwalk/elevator/**` 主类(不含 `$` 内部类)")
md.append("- 维度:类存在性 / API签名 / 方法实现")
md.append("")
md.append("## 1. 总览")
md.append("")
md.append("| 维度 | 数量 |")
md.append("|---|---:|")
for k in ["MATCH", "IMPL_DIFF", "API_DIFF", "ONLY_V1", "ONLY_V2"]:
md.append(f"| {k} | {counter.get(k, 0)} |")
md.append("")
md.append("## 2. 各模块分布")
md.append("")
md.append("| 模块 | MATCH | IMPL_DIFF | API_DIFF | ONLY_V1 | ONLY_V2 |")
md.append("|---|---:|---:|---:|---:|---:|")
for m in [p[0] for p in PAIR]:
c = mod_counter[m]
md.append(
f"| {m} | {c.get('MATCH',0)} | {c.get('IMPL_DIFF',0)} | {c.get('API_DIFF',0)} | {c.get('ONLY_V1',0)} | {c.get('ONLY_V2',0)} |"
)
md.append("")
def section(title: str, status: str, limit: int = 120) -> None:
md.append(f"## {title}")
md.append("")
rs = [r for r in all_records if r.status == status]
if not rs:
md.append("- 无")
md.append("")
return
md.append("| 模块 | 类 |")
md.append("|---|---|")
for r in rs[:limit]:
md.append(f"| {r.module} | `{r.class_name}` |")
if len(rs) > limit:
md.append(f"| ... | 其余 {len(rs)-limit} 个见 JSON |")
md.append("")
section("3. API 相同但实现不同(优先查 JD-GUI 还原误差/人工改写)", "IMPL_DIFF")
section("4. API 不同(优先查源码结构漂移)", "API_DIFF")
section("5. 仅 V2 存在(新增类)", "ONLY_V2")
section("6. 仅 V1 存在(缺失类)", "ONLY_V1")
md.append("## 7. 根因判读指引")
md.append("")
md.append("- `IMPL_DIFF`:若对应源码与 CFR 文本高度相似但字节码仍不同,多为 JD-GUI 语句重排/条件翻译偏差或后续手工改写。")
md.append("- `API_DIFF`:优先判定为“源码结构变化”,例如方法签名、字段类型、泛型边界变化。")
md.append("- `ONLY_V2/ONLY_V1`:属于文件集合漂移(新增/漏还原/清理)。")
md.append("")
md.append("## 8. API_DIFF 判读")
md.append("")
md.append("- **成员顺序**:同签名不同顺序(常见于 Lombok getter/setter 顺序)已由 `norm_api` 合并 descriptor 后排序消除伪差异。")
md.append("- **默认可视构造**:仅含 static 的类若未写显式私有空参构造,javac 可能生成 **public** 默认构造,与旧 JAR 中 **private** 不同;可在源码中补充与 V1 一致。")
md.append("- **真实契约差异**:仍报 API_DIFF 时多为 V2 **新增/改签名** 的方法或字段,需对照业务是否保留(并非脚本噪声)。")
md.append("")
md.append("## 9. 实现归一化(摘要)")
md.append("")
md.append("- **行级**:指令偏移、`#` 常量池下标、`if_icmp*`/`goto` 目标、`xload`/`xstore` 槽位、`iinc`、"
"`ArrayList`→`List` 的 add/size、`tableswitch` 纯数字行等。")
md.append("- **二次(`bytecode_norm_post`**`ifxx`+`goto` 蹦床→反条件单条;连续 `goto @` 折叠;"
"连续重复 `checkcast` 去重;`String.join` 前冗余 `checkcast java/lang/String`"
"`iconst_0`+dup 与 istore/iload 轮换初始化局部;Apache POI `setCellType`/`setBold`/`setColor` 新旧 API 占位符。")
md.append("")
Path(args.out_md).write_text("\n".join(md), encoding="utf-8")
print(f"WROTE: {args.out_md}")
print(f"WROTE: {args.out_json}")
return 0
if __name__ == "__main__":
raise SystemExit(main())
@@ -0,0 +1,83 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations
import difflib
import json
import subprocess
from datetime import datetime
from pathlib import Path
from bytecode_parity_rootcause import norm_code
ROOT = Path(__file__).resolve().parents[3]
REPORT = ROOT / "tools/elevator_api_parity/report/V1-V2-BYTECODE-ROOTCAUSE.json"
OUT = ROOT / "tools/elevator_api_parity/report/impl-diff-evidence-20260428"
V1 = ROOT.parent / "cw-elevator-application-V1.0.0.20211103/cw_lib"
PAIR = {
"cw-elevator-application-common": "cw-elevator-application-common",
"cw-elevator-application-data": "cw-elevator-application-data",
"cw-elevator-application-service": "cw-elevator-application-service",
"cw-elevator-application-web": "cw-elevator-application-web",
}
def run(cmd: list[str]) -> str:
return subprocess.check_output(cmd, text=True, stderr=subprocess.DEVNULL)
def main() -> int:
OUT.mkdir(parents=True, exist_ok=True)
data = json.loads(REPORT.read_text(encoding="utf-8"))
records = [r for r in data["records"] if r["status"] == "IMPL_DIFF"]
index = []
for r in records:
module = r["module"]
art = PAIR[module]
cls = r["class_name"]
v1_jar = V1 / f"{art}-1.0-SNAPSHOT.jar"
v2_jar = ROOT / module / "target" / f"{art}-2.0-SNAPSHOT.jar"
raw1 = run(["javap", "-classpath", str(v1_jar), "-p", "-c", cls])
raw2 = run(["javap", "-classpath", str(v2_jar), "-p", "-c", cls])
n1 = norm_code(raw1)
n2 = norm_code(raw2)
cls_file = cls.replace(".", "_")
p1 = OUT / f"{cls_file}.v1.javap.txt"
p2 = OUT / f"{cls_file}.v2.javap.txt"
pd = OUT / f"{cls_file}.diff.txt"
p1.write_text(n1, encoding="utf-8")
p2.write_text(n2, encoding="utf-8")
diff = "".join(
difflib.unified_diff(
n1.splitlines(keepends=True),
n2.splitlines(keepends=True),
fromfile=f"{cls}.v1",
tofile=f"{cls}.v2",
n=3,
)
)
pd.write_text(diff, encoding="utf-8")
index.append(
{
"module": module,
"class_name": cls,
"v1_file": str(p1),
"v2_file": str(p2),
"diff_file": str(pd),
"diff_lines": len(diff.splitlines()),
}
)
idx = {
"generated_at": datetime.utcnow().isoformat() + "Z",
"count": len(index),
"items": index,
}
(OUT / "INDEX.json").write_text(json.dumps(idx, ensure_ascii=False, indent=2), encoding="utf-8")
print(str(OUT))
print(f"IMPL_DIFF files: {len(index)}")
return 0
if __name__ == "__main__":
raise SystemExit(main())
@@ -0,0 +1,187 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
将 Maven 模块 `src/main/java/cn/cloudwalk/elevator` 与 V1 CFR 反编译树(decompile_v1_elevator_jars.sh 输出)按类文件路径配对,写相似度报告。
环境变量:
V1_DECOMP_ROOT 默认: <maven根>/tools/v1-decompiled/cfr-from-cw-lib-current
MV_ROOT 默认: 本脚本上溯 3 级到 maven-cw-elevator-application
OUT_MD 默认: tools/elevator_api_parity/report/SOURCE-PARITY-BY-CLASS-decomp.md
"""
from __future__ import annotations
import difflib
import os
import re
import sys
from pathlib import Path
SCR = Path(__file__).resolve()
# scripts -> elevator_api_parity -> tools -> maven-cw-elevator-application
MV_ROOT = Path(os.environ.get("MV_ROOT", SCR.parents[3]))
_DEFAULT_V1 = MV_ROOT / "tools" / "v1-decompiled" / "cfr-from-cw-lib-current"
# Maven 模块名 -> V1 反编译目录名(脚本输出的子目录前缀)
_PAIR = [
("cw-elevator-application-common", "cw-elevator-application-common-1.0-SNAPSHOT"),
("cw-elevator-application-data", "cw-elevator-application-data-1.0-SNAPSHOT"),
("cw-elevator-application-service", "cw-elevator-application-service-1.0-SNAPSHOT"),
("cw-elevator-application-web", "cw-elevator-application-web-1.0-SNAPSHOT"),
]
def strip_java_comments(text: str) -> str:
text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
text = re.sub(r"//[^\n]*", "", text)
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
return "\n".join(lines)
def similarity(a: str, b: str) -> float:
ca, cb = strip_java_comments(a), strip_java_comments(b)
return difflib.SequenceMatcher(a=ca, b=cb).ratio()
def tag_only_mv(rel: str) -> str:
if "TenantVisitorFloor" in rel:
return "租户访客楼层策略(V2)"
if rel.endswith("package-info.java"):
return "package-info"
if "CloudwalkSessionHolderConfiguration" in rel:
return "Session 扩展"
if "ElevatorRemoteIo" in rel:
return "远程 IO 池"
if "DavinciStorageBeansConfiguration" in rel or "OpenFeignFileStorageManager" in rel:
return "Davinci 存储"
if "ZoneTreeCollectors" in rel:
return "工具类"
return ""
def main() -> int:
v1_root_parent = Path(os.environ.get("V1_DECOMP_ROOT", _DEFAULT_V1)).resolve()
if not v1_root_parent.is_dir():
print(f"ERROR: V1 decompile dir missing: {v1_root_parent}", file=sys.stderr)
print("Run: maven-cw-elevator-application/scripts/decompile_v1_elevator_jars.sh", file=sys.stderr)
return 1
out_md = Path(
os.environ.get(
"OUT_MD",
MV_ROOT / "tools/elevator_api_parity/report/SOURCE-PARITY-BY-CLASS-decomp.md",
)
)
lines: list[str] = []
lines.append("# Maven 源码 vs V1 自动反编译(cw_lib jar / CFR)逐类比对")
lines.append("")
lines.append(f"- **V1 根**: `{v1_root_parent}``decompile_v1_elevator_jars.sh` 生成)")
lines.append(f"- **V2 根**: `{MV_ROOT}/{{module}}/src/main/java/cn/cloudwalk/elevator`")
lines.append("- **相似度**: 去注释后 `SequenceMatcher.ratio`CFR 噪声会导致分数偏低)")
lines.append("")
all_rows: list[tuple[str, str, float]] = []
tot = {"paired": 0, "ge98": 0, "85": 0, "lt": 0, "o1": 0, "o2": 0}
summary_rows: list[str] = []
for mod_mvn, mod_v1dir in _PAIR:
v1_elev = v1_root_parent / mod_v1dir / "cn/cloudwalk/elevator"
mv_elev = MV_ROOT / mod_mvn / "src/main/java/cn/cloudwalk/elevator"
if not v1_elev.is_dir():
summary_rows.append(f"| {mod_mvn} | — | — | **V1 侧目录不存在**: `{v1_elev}` |")
continue
if not mv_elev.is_dir():
summary_rows.append(f"| {mod_mvn} | — | — | **Maven 侧目录不存在** |")
continue
v1_files = {str(p.relative_to(v1_elev)).replace("\\", "/"): p for p in v1_elev.rglob("*.java")}
mv_files = {str(p.relative_to(mv_elev)).replace("\\", "/"): p for p in mv_elev.rglob("*.java")}
cnt = {"paired": 0, "ge98": 0, "85_98": 0, "lt85": 0, "only_v1": 0, "only_mv": 0}
for rel in sorted(set(v1_files) | set(mv_files)):
p1, pm = v1_files.get(rel), mv_files.get(rel)
if p1 and pm:
cnt["paired"] += 1
try:
s = similarity(
p1.read_text(encoding="utf-8", errors="replace"),
pm.read_text(encoding="utf-8", errors="replace"),
)
except OSError:
s = 0.0
if s >= 0.98:
cnt["ge98"] += 1
elif s >= 0.85:
cnt["85_98"] += 1
else:
cnt["lt85"] += 1
all_rows.append((f"{mod_mvn}/{rel}", s))
elif p1:
cnt["only_v1"] += 1
else:
cnt["only_mv"] += 1
tot["paired"] += cnt["paired"]
tot["ge98"] += cnt["ge98"]
tot["85"] += cnt["85_98"]
tot["lt"] += cnt["lt85"]
tot["o1"] += cnt["only_v1"]
tot["o2"] += cnt["only_mv"]
summary_rows.append(
f"| {mod_mvn} | {cnt['paired']} | {cnt['ge98']} | {cnt['85_98']} | {cnt['lt85']} | "
f"{cnt['only_v1']} | {cnt['only_mv']} |"
)
lines.append("## 1. 汇总(四模块来自 cw_lib jar 反编译)")
lines.append("")
lines.append(
"| 模块 | 配对 | ≥0.98 | 0.850.98 | <0.85 | 仅 V1 | 仅 Maven |"
)
lines.append("|------|------|-------|-----------|-------|-------|------------|")
lines.extend(summary_rows)
lines.append(
f"| **合计** | **{tot['paired']}** | **{tot['ge98']}** | **{tot['85']}** | **{tot['lt']}** | **{tot['o1']}** | **{tot['o2']}** |"
)
lines.append("")
# 仅 Maven
lines.append("## 2. 仅 Maven(相对 `cn/cloudwalk/elevator`")
lines.append("")
for mod_mvn, mod_v1dir in _PAIR:
v1_elev = v1_root_parent / mod_v1dir / "cn/cloudwalk/elevator"
mv_elev = MV_ROOT / mod_mvn / "src/main/java/cn/cloudwalk/elevator"
if not mv_elev.is_dir() or not v1_elev.is_dir():
continue
v1_set = {str(p.relative_to(v1_elev)).replace("\\", "/") for p in v1_elev.rglob("*.java")}
extras = []
for p in sorted(mv_elev.rglob("*.java")):
rel = str(p.relative_to(mv_elev)).replace("\\", "/")
if rel not in v1_set:
tg = tag_only_mv(rel)
extras.append(f"- `{rel}`" + (f" — *{tg}*" if tg else ""))
if extras:
lines.append(f"### {mod_mvn}")
lines.extend(extras)
lines.append("")
worst = sorted(all_rows, key=lambda x: x[1])[:35]
best = sorted(all_rows, key=lambda x: -x[1])[:15]
lines.append("## 3. 样本:最高 / 最低相似度路径")
lines.append("")
lines.append("### 最高")
for path, s in best:
lines.append(f"- `{path}` **{s:.4f}**")
lines.append("")
lines.append("### 最低(优先 IDE diff")
for path, s in worst:
lines.append(f"- `{path}` **{s:.4f}**")
out_md.parent.mkdir(parents=True, exist_ok=True)
out_md.write_text("\n".join(lines) + "\n", encoding="utf-8")
print(out_md)
return 0
if __name__ == "__main__":
raise SystemExit(main())
@@ -0,0 +1,266 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
V1CFR 反编译 cw_lib vs V2(Maven 源码)全量类级比对,输出:
- Markdown 报告(汇总 + 分桶完整清单 + 问题梳理)
- 可选 JSON--json
环境变量同 source_parity_scan.pyV1_DECOMP_ROOT、MV_ROOT
"""
from __future__ import annotations
import argparse
import difflib
import json
import os
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
SCR = Path(__file__).resolve()
MV_ROOT = Path(os.environ.get("MV_ROOT", SCR.parents[3]))
_DEFAULT_V1 = MV_ROOT / "tools" / "v1-decompiled" / "cfr-from-cw-lib-current"
_PAIR = [
("cw-elevator-application-common", "cw-elevator-application-common-1.0-SNAPSHOT"),
("cw-elevator-application-data", "cw-elevator-application-data-1.0-SNAPSHOT"),
("cw-elevator-application-service", "cw-elevator-application-service-1.0-SNAPSHOT"),
("cw-elevator-application-web", "cw-elevator-application-web-1.0-SNAPSHOT"),
]
def strip_java_comments(text: str) -> str:
text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
text = re.sub(r"//[^\n]*", "", text)
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
return "\n".join(lines)
def similarity(a: str, b: str) -> float:
ca, cb = strip_java_comments(a), strip_java_comments(b)
return difflib.SequenceMatcher(a=ca, b=cb).ratio()
def tag_only_mv(rel: str) -> str:
if "TenantVisitorFloor" in rel:
return "V2扩展-租户访客楼层"
if rel.endswith("package-info.java"):
return "工程化-package-info"
if "CloudwalkSessionHolderConfiguration" in rel:
return "V2扩展-Session"
if "ElevatorRemoteIo" in rel:
return "V2扩展-远程IO线程池"
if "DavinciStorageBeansConfiguration" in rel or "OpenFeignFileStorageManager" in rel:
return "V2扩展-Davinci存储"
if "ZoneTreeCollectors" in rel:
return "V2独有-工具类"
return "V2独有-待分类"
def review_priority(rel: str) -> str:
r = rel.replace("\\", "/")
if "/controller/" in r or r.endswith("Controller.java"):
return "高-Controller"
if "FeignClient" in r or "/client/" in r:
return "高-Feign/Client"
if "ServiceImpl" in r or "/impl/" in r:
return "高-Service/DaoImpl"
if "/mapper/" in r or "Mapper.java" in r:
return "中-Mapper接口"
if "/dto/" in r or "/param/" in r or "/form/" in r or "/result/" in r:
return "中-DTO表单"
if "/em/" in r or "Enum.java" in r:
return "低-枚举(CFR噪声常见)"
if "/util/" in r:
return "低-工具类"
return "中-其他"
def bucket(score: float) -> str:
if score >= 0.98:
return "A-高度一致"
if score >= 0.85:
return "B-轻度差异"
return "C-显著差异"
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--json", action="store_true", help="额外写出 JSON")
ap.add_argument(
"--out",
default=str(MV_ROOT / "tools/elevator_api_parity/report/V1-V2-DIFF-ISSUES.md"),
help="Markdown 输出路径",
)
args = ap.parse_args()
v1_root_parent = Path(os.environ.get("V1_DECOMP_ROOT", _DEFAULT_V1)).resolve()
if not v1_root_parent.is_dir():
print(f"ERROR: V1 decompile dir missing: {v1_root_parent}", file=sys.stderr)
return 1
records: list[dict] = []
only_mv: list[tuple[str, str, str]] = []
totals = {"paired": 0, "A": 0, "B": 0, "C": 0, "only_v1": 0, "only_mv": 0}
for mod_mvn, mod_v1dir in _PAIR:
v1_elev = v1_root_parent / mod_v1dir / "cn/cloudwalk/elevator"
mv_elev = MV_ROOT / mod_mvn / "src/main/java/cn/cloudwalk/elevator"
if not v1_elev.is_dir() or not mv_elev.is_dir():
continue
v1_files = {str(p.relative_to(v1_elev)).replace("\\", "/"): p for p in v1_elev.rglob("*.java")}
mv_files = {str(p.relative_to(mv_elev)).replace("\\", "/"): p for p in mv_elev.rglob("*.java")}
for rel in sorted(set(v1_files) | set(mv_files)):
p1, pm = v1_files.get(rel), mv_files.get(rel)
if p1 and pm:
totals["paired"] += 1
try:
t1 = p1.read_text(encoding="utf-8", errors="replace")
t2 = pm.read_text(encoding="utf-8", errors="replace")
s = similarity(t1, t2)
except OSError:
s = 0.0
t1 = t2 = ""
bk = bucket(s)
if bk.startswith("A"):
totals["A"] += 1
elif bk.startswith("B"):
totals["B"] += 1
else:
totals["C"] += 1
pri = review_priority(rel)
records.append(
{
"module": mod_mvn,
"relative_path": rel,
"similarity": round(s, 6),
"bucket": bk,
"review_priority": pri,
"v1_file": str(p1),
"v2_file": str(pm),
}
)
elif p1:
totals["only_v1"] += 1
records.append(
{
"module": mod_mvn,
"relative_path": rel,
"similarity": None,
"bucket": "ONLY_V1",
"review_priority": review_priority(rel),
"v1_file": str(p1),
"v2_file": None,
}
)
else:
totals["only_mv"] += 1
tg = tag_only_mv(rel)
only_mv.append((mod_mvn, rel, tg))
records.append(
{
"module": mod_mvn,
"relative_path": rel,
"similarity": None,
"bucket": "ONLY_MAVEN",
"review_priority": tg,
"v1_file": None,
"v2_file": str(pm),
}
)
out_md = Path(args.out)
out_md.parent.mkdir(parents=True, exist_ok=True)
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
md: list[str] = []
md.append("# V1 vs V2 源码差异与问题梳理(全自动比对)")
md.append("")
md.append(f"- **生成时间(UTC**: {ts}")
md.append(f"- **V1**: `{v1_root_parent}`cw_lib jar → CFR")
md.append(f"- **V2**: `{MV_ROOT}/{{module}}/src/main/java/cn/cloudwalk/elevator`")
md.append("- **相似度**: 去注释后全文 `SequenceMatcher.ratio`**低分≠业务错误**(CFR 变量名/枚举顺序等)。")
md.append("")
md.append("## 1. 结论摘要")
md.append("")
md.append("| 维度 | 数量 | 说明 |")
md.append("|------|------|------|")
md.append(f"| 配对类 | {totals['paired']} | 同名路径 `.java` |")
md.append(f"| A 桶 ≥0.98 | {totals['A']} | 文本高度接近 |")
md.append(f"| B 桶 0.850.98 | {totals['B']} | 多为格式/注释/轻微差异,建议抽检 |")
md.append(f"| C 桶 <0.85 | {totals['C']} | **优先 IDE diff**;枚举/DTO 常为 CFR 噪声 |")
md.append(f"| 仅 V1 | {totals['only_v1']} | 当前四模块应为 0 |")
md.append(f"| 仅 Maven | {totals['only_mv']} | V2 新增;访客楼层策略等为**已知扩展** |")
md.append("")
md.append("### 1.1 建议处置")
md.append("")
md.append("1. **仅 Maven**`TenantVisitorFloor*` — 与 V1 对齐无关(产品已排除);`package-info` — 忽略。")
md.append("2. **C 桶 + 高优先级**Controller / ServiceImpl / Feign):优先人工对照业务分支与远程调用。")
md.append("3. **C 桶 + 枚举/DTO**:优先视为 CFR 噪声,除非接口契约变更。")
md.append("")
md.append("## 2. 仅 Maven 文件(问题/范围标注)")
md.append("")
for mod, rel, tg in sorted(only_mv, key=lambda x: (x[0], x[1])):
md.append(f"- `{mod}/{rel}` — **{tg}**")
md.append("")
md.append("## 3. C 桶(<0.85)按复核优先级分组")
md.append("")
c_records = [r for r in records if r.get("similarity") is not None and r["similarity"] < 0.85]
by_pri: dict[str, list[dict]] = {}
for r in c_records:
by_pri.setdefault(r["review_priority"], []).append(r)
for pri in sorted(by_pri.keys()):
md.append(f"### {pri}")
md.append("")
md.append("| 路径 | 相似度 |")
md.append("|------|--------|")
for r in sorted(by_pri[pri], key=lambda x: -x["similarity"]):
md.append(f"| `{r['module']}/{r['relative_path']}` | {r['similarity']:.4f} |")
md.append("")
md.append("## 4. 完整配对表(全部类)")
md.append("")
md.append("| 模块 | 相对路径 | 相似度 | 分桶 | 复核优先级 |")
md.append("|------|----------|--------|------|------------|")
for r in sorted(
[x for x in records if x.get("similarity") is not None],
key=lambda x: (x["module"], x["relative_path"]),
):
md.append(
f"| {r['module']} | `{r['relative_path']}` | {r['similarity']:.4f} | {r['bucket']} | {r['review_priority']} |"
)
md.append("")
md.append("### 仅 V1 / 仅 Maven 行")
md.append("")
for r in records:
if r["bucket"] in ("ONLY_V1", "ONLY_MAVEN"):
md.append(f"- `{r['module']}/{r['relative_path']}` — **{r['bucket']}** — {r['review_priority']}")
md.append("")
md.append("## 5. 生成命令")
md.append("")
md.append("```bash")
md.append("cd maven-cw-elevator-application")
md.append("./scripts/decompile_v1_elevator_jars.sh # 若尚未反编译 V1 jar")
md.append("python3 tools/elevator_api_parity/scripts/v1_v2_diff_issues_report.py")
md.append("```")
md.append("")
out_md.write_text("\n".join(md) + "\n", encoding="utf-8")
print(out_md)
if args.json:
out_json = out_md.with_suffix(".json")
out_json.write_text(json.dumps({"generated": ts, "totals": totals, "records": records}, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
print(out_json)
return 0
if __name__ == "__main__":
raise SystemExit(main())