#!/usr/bin/env python3
"""
文档交叉对比脚本
从 /home/liangzi/document/ 读取旧文档，提取关键信息进行交叉比对
被知识时光机脚本调用，输出结果写进矛盾提醒.md
"""

import os
import re
import sys


def main():
    doc_dir = '/home/liangzi/document'
    verify_file = sys.argv[1] if len(sys.argv) > 1 else ''

    print('旧文档交叉对比结果:')
    print('扫描目录: {}'.format(doc_dir))
    print()

    for root, dirs, files in os.walk(doc_dir):
        if '知识时光机' in root:
            continue
        for f in sorted(files):
            if not f.endswith('.md'):
                continue
            path = os.path.join(root, f)
            rel = os.path.relpath(path, doc_dir)
            try:
                with open(path, 'r') as fp:
                    content = fp.read()
            except Exception:
                continue

            ips = set(re.findall(r'\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b', content))
            domains = set(
                re.findall(
                    r'([a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.marschat\.online)',
                    content,
                )
            )
            pwd_count = len(
                re.findall(
                    r'(?:密码|pass|password|passwd)[=:]\s*[`"\']?(\S{4,})',
                    content,
                    re.IGNORECASE,
                )
            )

            detail_parts = []
            if ips:
                detail_parts.append('IP: ' + ', '.join(sorted(ips)))
            if domains:
                detail_parts.append('域名: ' + ', '.join(sorted(domains)))
            if pwd_count > 0:
                detail_parts.append('密码/凭证: {}处'.format(pwd_count))

            if detail_parts:
                detail_str = '; '.join(detail_parts)
                line = '  📄 {} | {}'.format(rel, detail_str)
                print(line)
                if verify_file:
                    with open(verify_file, 'a') as vf:
                        vf.write('- 🟡 文档交叉: {} | {}\n'.format(rel, detail_str))

    print()
    print('✅ 文档交叉对比完成')


if __name__ == '__main__':
    main()
