"""
MOPS 公司基本資料爬蟲
使用 Playwright 抓取公開資訊觀測站的公司基本資料
"""
import sys
sys.stdout.reconfigure(encoding='utf-8')

import json
import os
from datetime import datetime
from pathlib import Path

# Playwright 腳本模板
PLAYWRIGHT_SCRIPT = '''
const {{ chromium }} = require('playwright');

const TARGET_URL = 'https://mopsov.twse.com.tw/mops/web/t05st03';
const COMPANY_CODE = '{company_code}';

(async () => {{
  const browser = await chromium.launch({{ headless: true }});
  const page = await browser.newPage();

  try {{
    await page.goto(TARGET_URL, {{ waitUntil: 'networkidle', timeout: 30000 }});
    await page.fill('input#co_id', COMPANY_CODE);
    await page.click('input[value=" 查詢 "]');
    await page.waitForTimeout(3000);

    // 解析表格資料
    const rawData = await page.evaluate(() => {{
      const resultDiv = document.getElementById('table01');
      if (!resultDiv) return {{ error: 'no_result_div' }};

      const tables = resultDiv.querySelectorAll('table');
      const allRows = [];

      tables.forEach((table, tIdx) => {{
        const rows = table.querySelectorAll('tr');
        rows.forEach(row => {{
          const cells = row.querySelectorAll('td, th');
          const rowData = [];
          cells.forEach(cell => {{
            const text = cell.innerText.trim().replace(/\\s+/g, ' ');
            if (text) rowData.push(text);
          }});
          if (rowData.length > 0) {{
            allRows.push({{ table: tIdx, row: rowData }});
          }}
        }});
      }});

      return {{ rows: allRows }};
    }});

    // 解析成 key-value 格式
    const result = {{}};
    if (rawData.rows) {{
      rawData.rows.forEach(({{ row }}) => {{
        for (let i = 0; i < row.length - 1; i += 2) {{
          const key = row[i];
          const val = row[i + 1];
          if (key && val && !result[key]) {{
            if (val !== '－' && val !== '-' && key.length < 50) {{
              result[key] = val;
            }}
          }}
        }}
      }});
    }}

    console.log(JSON.stringify(result, null, 2));

  }} catch (error) {{
    console.error(JSON.stringify({{ error: error.message }}));
  }} finally {{
    await browser.close();
  }}
}})();
'''

def get_project_dir():
    """取得專案目錄"""
    return Path(__file__).parent

def get_data_dir():
    """取得資料儲存目錄"""
    data_dir = get_project_dir() / "data"
    data_dir.mkdir(exist_ok=True)
    return data_dir

def scrape_company(company_code: str) -> dict:
    """
    抓取公司基本資料

    Args:
        company_code: 公司代碼，如 "1101"

    Returns:
        包含公司資料和時間戳的字典
    """
    import subprocess
    import tempfile

    # 建立暫時腳本
    script_content = PLAYWRIGHT_SCRIPT.format(company_code=company_code)

    with tempfile.NamedTemporaryFile(mode='w', suffix='.js', delete=False, encoding='utf-8') as f:
        f.write(script_content)
        script_path = f.name

    try:
        # 執行 Playwright 腳本
        skill_dir = Path.home() / ".claude" / "plugins" / "marketplaces" / "playwright-skill" / "skills" / "playwright-skill"

        result = subprocess.run(
            ['node', 'run.js', script_path],
            cwd=skill_dir,
            capture_output=True,
            text=True,
            timeout=120
        )

        # 解析輸出（跳過 Playwright Skill 的標題行）
        output_lines = result.stdout.strip().split('\n')
        json_lines = []
        in_json = False

        for line in output_lines:
            if line.strip().startswith('{'):
                in_json = True
            if in_json:
                json_lines.append(line)

        if json_lines:
            raw_data = json.loads('\n'.join(json_lines))
        else:
            raw_data = {}

        # 加入時間戳
        return {
            "company_code": company_code,
            "fetched_at": datetime.now().isoformat(),
            "data": raw_data
        }

    finally:
        os.unlink(script_path)

def save_snapshot(company_code: str, data: dict, has_changes: bool = True, is_first: bool = False):
    """
    儲存資料快照（混合儲存策略）

    Args:
        company_code: 公司代碼
        data: 完整公司資料
        has_changes: 是否有變更
        is_first: 是否為首次抓取
    """
    data_dir = get_data_dir()

    # 儲存最新資料（永遠存完整版）
    latest_file = data_dir / f"{company_code}_latest.json"
    with open(latest_file, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    # 儲存歷史記錄（混合策略）
    history_file = data_dir / f"{company_code}_history.json"
    history = []
    if history_file.exists():
        with open(history_file, 'r', encoding='utf-8') as f:
            history = json.load(f)

    # 根據情況決定儲存格式
    if is_first or has_changes:
        # 首次或有變更：存完整資料
        record = {
            "company_code": company_code,
            "fetched_at": data["fetched_at"],
            "record_type": "full",
            "data": data["data"]
        }
    else:
        # 無變更：只存時間戳
        record = {
            "company_code": company_code,
            "fetched_at": data["fetched_at"],
            "record_type": "no_change"
        }

    history.append(record)

    # 只保留最近 50 筆
    if len(history) > 50:
        history = history[-50:]

    with open(history_file, 'w', encoding='utf-8') as f:
        json.dump(history, f, ensure_ascii=False, indent=2)

    # 更新公司總表
    update_company_index(company_code, data)

    return latest_file


def update_company_index(company_code: str, data: dict):
    """更新公司總表"""
    data_dir = get_data_dir()
    index_file = data_dir / "index.json"

    index = {}
    if index_file.exists():
        with open(index_file, 'r', encoding='utf-8') as f:
            index = json.load(f)

    # 更新該公司的索引資訊
    company_data = data.get("data", {})
    index[company_code] = {
        "name": company_data.get("公司名稱", ""),
        "industry": company_data.get("產業類別", ""),
        "last_fetched": data["fetched_at"],
        "fetch_count": index.get(company_code, {}).get("fetch_count", 0) + 1
    }

    with open(index_file, 'w', encoding='utf-8') as f:
        json.dump(index, f, ensure_ascii=False, indent=2)


def get_company_index() -> dict:
    """取得公司總表"""
    index_file = get_data_dir() / "index.json"
    if index_file.exists():
        with open(index_file, 'r', encoding='utf-8') as f:
            return json.load(f)
    return {}

def compare_data(old_data: dict, new_data: dict) -> list:
    """比較新舊資料，找出變更"""
    changes = []

    old = old_data.get('data', {})
    new = new_data.get('data', {})

    # 重要監控欄位
    important_fields = [
        '董事長', '總經理', '發言人', '代理發言人',
        '簽證會計師1', '簽證會計師2', '簽證會計師事務所'
    ]

    all_keys = set(old.keys()) | set(new.keys())

    for key in all_keys:
        old_val = old.get(key, '')
        new_val = new.get(key, '')

        if old_val != new_val:
            is_important = key in important_fields
            changes.append({
                'field': key,
                'old_value': old_val,
                'new_value': new_val,
                'is_important': is_important
            })

    return changes

def get_latest(company_code: str) -> dict | None:
    """取得最新的儲存資料"""
    latest_file = get_data_dir() / f"{company_code}_latest.json"
    if latest_file.exists():
        with open(latest_file, 'r', encoding='utf-8') as f:
            return json.load(f)
    return None

def format_age(fetched_at: str) -> str:
    """格式化資料年齡"""
    fetched_time = datetime.fromisoformat(fetched_at)
    age = datetime.now() - fetched_time

    if age.days > 0:
        return f"{age.days} 天前"
    elif age.seconds > 3600:
        return f"{age.seconds // 3600} 小時前"
    elif age.seconds > 60:
        return f"{age.seconds // 60} 分鐘前"
    else:
        return "剛剛"

def show_index():
    """顯示已抓取公司總表"""
    index = get_company_index()
    if not index:
        print("尚未抓取任何公司資料")
        return

    print(f"\n=== 已抓取公司總表（共 {len(index)} 間）===\n")
    print(f"{'代碼':<8} {'公司名稱':<20} {'產業類別':<12} {'抓取次數':<8} {'最後更新'}")
    print("-" * 80)

    for code, info in sorted(index.items()):
        name = info.get('name', '')[:18]
        industry = info.get('industry', '')[:10]
        count = info.get('fetch_count', 0)
        last = info.get('last_fetched', '')[:10]
        print(f"{code:<8} {name:<20} {industry:<12} {count:<8} {last}")


def main():
    import argparse

    parser = argparse.ArgumentParser(description='MOPS 公司基本資料追蹤器')
    parser.add_argument('company_code', nargs='?', help='公司代碼，如 1101')
    parser.add_argument('--refresh', '-r', action='store_true', help='強制重新抓取')
    parser.add_argument('--json', '-j', action='store_true', help='輸出 JSON 格式')
    parser.add_argument('--index', '-i', action='store_true', help='顯示已抓取公司總表')

    args = parser.parse_args()

    # 顯示總表
    if args.index:
        show_index()
        return

    if not args.company_code:
        parser.print_help()
        return

    # 檢查現有資料
    existing = get_latest(args.company_code)
    is_first = existing is None

    if existing and not args.refresh:
        age_str = format_age(existing['fetched_at'])
        print(f"現有資料（{age_str}更新）")

        if args.json:
            print(json.dumps(existing, ensure_ascii=False, indent=2))
        else:
            # 顯示關鍵欄位
            data = existing.get('data', {})
            print(f"\n公司代碼: {args.company_code}")
            print(f"公司名稱: {data.get('公司名稱', 'N/A')}")
            print(f"董事長: {data.get('董事長', 'N/A')}")
            print(f"總經理: {data.get('總經理', 'N/A')}")
            print(f"發言人: {data.get('發言人', 'N/A')}")
            print(f"簽證會計師1: {data.get('簽證會計師1', 'N/A')}")
            print(f"簽證會計師2: {data.get('簽證會計師2', 'N/A')}")
            print(f"\n查詢時間: {existing['fetched_at']}")
            print(f"\n使用 --refresh 或 -r 來更新資料")
        return

    # 抓取新資料
    print(f"正在抓取 {args.company_code} 的公司基本資料...")
    new_data = scrape_company(args.company_code)

    # 比較變更
    has_changes = True
    if existing:
        changes = compare_data(existing, new_data)
        has_changes = len(changes) > 0
        if changes:
            print("\n=== 偵測到變更 ===")
            for change in changes:
                marker = "[重要]" if change['is_important'] else ""
                print(f"{marker} {change['field']}: {change['old_value']} -> {change['new_value']}")
        else:
            print("\n資料無變更（僅記錄時間戳）")

    # 儲存（混合策略）
    save_path = save_snapshot(args.company_code, new_data, has_changes=has_changes, is_first=is_first)
    print(f"\n資料已儲存至: {save_path}")

    if args.json:
        print(json.dumps(new_data, ensure_ascii=False, indent=2))
    else:
        data = new_data.get('data', {})
        print(f"\n=== 公司基本資料 ===")
        print(f"公司代碼: {args.company_code}")
        print(f"公司名稱: {data.get('公司名稱', 'N/A')}")
        print(f"董事長: {data.get('董事長', 'N/A')}")
        print(f"總經理: {data.get('總經理', 'N/A')}")
        print(f"發言人: {data.get('發言人', 'N/A')}")
        print(f"簽證會計師1: {data.get('簽證會計師1', 'N/A')}")
        print(f"簽證會計師2: {data.get('簽證會計師2', 'N/A')}")

if __name__ == '__main__':
    main()
