#!/usr/bin/env python3
"""
雙儲存研究筆記同步檢查工具
檢查所有啟用 dual_storage 的筆記是否需要同步

使用方式：
    python check_sync.py
    python check_sync.py --verbose
"""
import sys
sys.stdout.reconfigure(encoding='utf-8')

import re
import yaml
from pathlib import Path
from datetime import datetime
from typing import Optional, Dict, List, Tuple

# 設定
RESEARCH_ZOO_PATH = Path(r"C:\Users\User\Documents\GitHub\Research_zoo\research")
PROJECTS = {
    "real-estate-advisor": Path(r"C:\Users\User\Documents\GitHub\real-estate-advisor\research")
}


def extract_yaml_frontmatter(content: str) -> Optional[Dict]:
    """從 Markdown 內容擷取 YAML frontmatter"""
    match = re.match(r'^---\n(.*?)\n---', content, re.DOTALL)
    if not match:
        return None
    try:
        return yaml.safe_load(match.group(1))
    except yaml.YAMLError:
        return None


def check_file_sync(md_file: Path) -> Optional[Dict]:
    """
    檢查單一檔案的同步狀態

    Returns:
        None 如果不需要同步
        Dict 包含同步狀態資訊
    """
    try:
        content = md_file.read_text(encoding='utf-8')
    except Exception as e:
        return {"file": str(md_file), "error": f"讀取失敗: {e}"}

    metadata = extract_yaml_frontmatter(content)
    if not metadata:
        return None

    dual_storage = metadata.get('dual_storage', {})
    if not dual_storage.get('enabled'):
        return None

    linked_path = dual_storage.get('linked_path')
    if not linked_path:
        return {
            "file": str(md_file),
            "status": "missing_link",
            "message": "dual_storage 已啟用但缺少 linked_path"
        }

    linked_file = Path(linked_path)
    if not linked_file.exists():
        return {
            "file": str(md_file),
            "status": "link_not_found",
            "linked_path": linked_path,
            "message": "連結的檔案不存在"
        }

    # 讀取連結檔案的 metadata
    try:
        linked_content = linked_file.read_text(encoding='utf-8')
        linked_metadata = extract_yaml_frontmatter(linked_content)
    except Exception as e:
        return {
            "file": str(md_file),
            "status": "link_read_error",
            "linked_path": linked_path,
            "message": f"讀取連結檔案失敗: {e}"
        }

    if not linked_metadata:
        return {
            "file": str(md_file),
            "status": "link_no_yaml",
            "linked_path": linked_path,
            "message": "連結檔案沒有 YAML frontmatter"
        }

    linked_dual = linked_metadata.get('dual_storage', {})

    # 比較版本
    primary_version = dual_storage.get('primary_version', '0.0.0')
    local_version = dual_storage.get('local_version', '0.0.0')
    linked_local_version = linked_dual.get('local_version', '0.0.0')

    if primary_version != linked_local_version:
        return {
            "file": str(md_file),
            "status": "out_of_sync",
            "role": dual_storage.get('role'),
            "primary_version": primary_version,
            "local_version": local_version,
            "linked_local_version": linked_local_version,
            "linked_path": linked_path,
            "last_sync": dual_storage.get('last_sync'),
            "message": f"版本不同步: 主本 {primary_version} vs 副本 {linked_local_version}"
        }

    return {
        "file": str(md_file),
        "status": "synced",
        "primary_version": primary_version,
        "linked_path": linked_path,
        "last_sync": dual_storage.get('last_sync')
    }


def scan_research_zoo() -> List[Dict]:
    """掃描 Research_zoo 中所有啟用 dual_storage 的筆記"""
    results = []

    for md_file in RESEARCH_ZOO_PATH.rglob("*.md"):
        result = check_file_sync(md_file)
        if result:
            results.append(result)

    return results


def print_report(results: List[Dict], verbose: bool = False):
    """印出同步狀態報告"""
    synced = [r for r in results if r.get('status') == 'synced']
    out_of_sync = [r for r in results if r.get('status') == 'out_of_sync']
    errors = [r for r in results if r.get('status') not in ['synced', 'out_of_sync', None]]

    print("=" * 60)
    print("雙儲存研究筆記同步檢查報告")
    print(f"檢查時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 60)

    print(f"\n總計: {len(results)} 個啟用雙儲存的筆記")
    print(f"  - 已同步: {len(synced)}")
    print(f"  - 需同步: {len(out_of_sync)}")
    print(f"  - 錯誤: {len(errors)}")

    if out_of_sync:
        print("\n" + "-" * 40)
        print("需要同步的筆記：")
        print("-" * 40)
        for item in out_of_sync:
            filename = Path(item['file']).name
            print(f"\n  [{item['role']}] {filename}")
            print(f"    主本版本: {item['primary_version']}")
            print(f"    副本版本: {item['linked_local_version']}")
            print(f"    上次同步: {item['last_sync']}")

    if errors:
        print("\n" + "-" * 40)
        print("發生錯誤的筆記：")
        print("-" * 40)
        for item in errors:
            filename = Path(item['file']).name
            print(f"\n  {filename}")
            print(f"    狀態: {item['status']}")
            print(f"    訊息: {item['message']}")

    if verbose and synced:
        print("\n" + "-" * 40)
        print("已同步的筆記：")
        print("-" * 40)
        for item in synced:
            filename = Path(item['file']).name
            print(f"  [OK] {filename} (v{item['primary_version']})")

    print("\n" + "=" * 60)


def main():
    import argparse
    parser = argparse.ArgumentParser(description="雙儲存研究筆記同步檢查")
    parser.add_argument('--verbose', '-v', action='store_true', help="顯示詳細資訊")
    args = parser.parse_args()

    print("掃描 Research_zoo 研究筆記...")
    results = scan_research_zoo()
    print_report(results, verbose=args.verbose)

    # 回傳非零狀態碼如果有需要同步的檔案
    out_of_sync = [r for r in results if r.get('status') == 'out_of_sync']
    if out_of_sync:
        sys.exit(1)


if __name__ == "__main__":
    main()
