#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
擷取不動產公告的詳細資料（API 版本）

使用 HTTP API 直接呼叫，不需要 Playwright，速度快約 38 倍。

使用方式：
  python api_fetch_details.py --date 2026-01-08
  python api_fetch_details.py  # 預設昨天
  python api_fetch_details.py --concurrent 5  # 併發 5 個請求
"""

import sys
sys.stdout.reconfigure(encoding='utf-8')

import argparse
import sqlite3
import time
from datetime import datetime, timedelta
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed

import requests
from bs4 import BeautifulSoup

BASE_PATH = Path(__file__).parent.parent
DB_PATH = BASE_PATH / 'db' / 'mops_announcements.db'

# 不動產相關類型
REAL_ESTATE_TYPES = [2, 3]

TYPE_NAMES = {
    2: '委建合建',
    3: '不動產設備'
}

# API 對應設定
# Type 1 和 Type 2 共用 ajax_t59sb03，但 step 不同
API_CONFIG = {
    2: {'url': 'https://mopsov.twse.com.tw/mops/web/ajax_t59sb03', 'step': '2b'},
    3: {'url': 'https://mopsov.twse.com.tw/mops/web/ajax_t67sb02', 'step': '2'},
}


def fetch_detail_via_api(ann: dict) -> list:
    """
    透過 API 取得單筆公告的詳細資料

    Args:
        ann: 公告資料 dict，包含 stock_code, announcement_date, skey, type_code 等

    Returns:
        list of dict: [{'name': '欄位名', 'value': '欄位值'}, ...]
    """
    config = API_CONFIG.get(ann['type_code'])
    if not config:
        return []

    # 轉換日期格式 (2026-01-08 -> 20260108)
    date_formatted = ann['announcement_date'].replace('-', '')

    # 計算民國年
    year = int(ann['announcement_date'][:4]) - 1911
    month = ann['announcement_date'][5:7]

    data = {
        'TYPEK': 'all',
        'step': config['step'],
        'firstin': '1',
        'co_id': ann['stock_code'],
        'DATE1': date_formatted,
        'SKEY': str(ann['skey']),
        'YEAR': str(year),
        'MONTH': month,
        'kind': ''
    }

    try:
        response = requests.post(config['url'], data=data, timeout=15)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')

        # 解析表格欄位
        result = []
        seen_keys = set()

        for row in soup.find_all('tr'):
            th = row.find('th')
            td = row.find('td')

            if th and td:
                name = th.get_text(strip=True).replace('\xa0', ' ').replace('\n', ' ')
                name = ' '.join(name.split())  # 正規化空白
                value = td.get_text(strip=True)

                # 過濾無效欄位
                if name and len(name) > 0 and len(name) < 200 and name not in ('欄位', '內容'):
                    # 避免重複欄位
                    if name not in seen_keys:
                        result.append({
                            'name': name,
                            'value': value[:2000] if value else ''
                        })
                        seen_keys.add(name)

        return result

    except Exception as e:
        print(f"    API 錯誤: {e}")
        return []


def process_announcement(ann: dict, db_path: Path) -> tuple:
    """
    處理單筆公告（用於併發處理）

    Returns:
        (ann_id, success, field_count, error_msg)
    """
    details = fetch_detail_via_api(ann)

    if not details:
        return (ann['id'], False, 0, '無資料')

    try:
        # 存入資料庫
        conn = sqlite3.connect(db_path)
        cur = conn.cursor()

        # 插入詳細欄位
        for i, detail in enumerate(details):
            cur.execute('''
                INSERT INTO announcement_details (announcement_id, field_name, field_value, field_order)
                VALUES (?, ?, ?, ?)
            ''', (ann['id'], detail['name'], detail['value'], i + 1))

        # 更新擷取狀態
        cur.execute('''
            UPDATE announcements
            SET detail_fetched = 1, detail_fetched_at = datetime('now', 'localtime')
            WHERE id = ?
        ''', (ann['id'],))

        conn.commit()
        conn.close()

        return (ann['id'], True, len(details), None)

    except Exception as e:
        return (ann['id'], False, 0, str(e))


def main():
    parser = argparse.ArgumentParser(description='擷取不動產公告詳細資料（API 版本）')
    parser.add_argument('--date', help='目標日期 (YYYY-MM-DD)，預設昨天')
    parser.add_argument('--concurrent', type=int, default=3, help='併發請求數（預設 3）')
    parser.add_argument('--delay', type=float, default=0.3, help='請求間隔秒數（預設 0.3）')
    args = parser.parse_args()

    # 決定日期
    if args.date:
        target_date = args.date
    else:
        yesterday = datetime.now() - timedelta(days=1)
        target_date = yesterday.strftime('%Y-%m-%d')

    print('=' * 50)
    print('不動產公告詳細資料擷取（API 版本）')
    print('=' * 50)
    print(f'目標日期: {target_date}')
    print(f'資料庫: {DB_PATH}')
    print(f'併發數: {args.concurrent}')
    print('-' * 50)
    print()

    if not DB_PATH.exists():
        print(f'錯誤: 資料庫不存在 {DB_PATH}')
        return

    conn = sqlite3.connect(DB_PATH)
    cur = conn.cursor()

    # 查詢待擷取的不動產公告
    cur.execute('''
        SELECT id, stock_code, company_name, announcement_date, skey, type_code, subject, market_type, query_year
        FROM announcements
        WHERE announcement_date = ?
          AND type_code IN (2, 3)
          AND detail_fetched = 0
        ORDER BY type_code, market_type, stock_code
    ''', (target_date,))

    columns = ['id', 'stock_code', 'company_name', 'announcement_date', 'skey', 'type_code', 'subject', 'market_type', 'query_year']
    pending = [dict(zip(columns, row)) for row in cur.fetchall()]
    conn.close()

    print(f'待擷取: {len(pending)} 筆不動產公告')
    print()

    if not pending:
        print('沒有待擷取的不動產公告')
        return

    # 顯示待處理項目
    for ann in pending:
        print(f"  [{ann['id']}] {ann['stock_code']} {ann['company_name']} ({TYPE_NAMES.get(ann['type_code'], '?')})")
    print()

    success_count = 0
    fail_count = 0
    start_time = time.time()

    # 使用併發處理
    if args.concurrent > 1:
        print(f'開始併發擷取（{args.concurrent} 併發）...')
        print()

        with ThreadPoolExecutor(max_workers=args.concurrent) as executor:
            futures = {}

            for i, ann in enumerate(pending):
                future = executor.submit(process_announcement, ann, DB_PATH)
                futures[future] = ann

                # 控制提交速度
                if i < len(pending) - 1:
                    time.sleep(args.delay)

            for future in as_completed(futures):
                ann = futures[future]
                ann_id, success, field_count, error = future.result()

                if success:
                    print(f"  [{ann_id}] {ann['stock_code']} {ann['company_name']} ... {field_count} 欄位")
                    success_count += 1
                else:
                    print(f"  [{ann_id}] {ann['stock_code']} {ann['company_name']} ... 失敗: {error}")
                    fail_count += 1
    else:
        # 序列處理
        print('開始序列擷取...')
        print()

        for ann in pending:
            print(f"  [{ann['id']}] {ann['stock_code']} {ann['company_name']} ({TYPE_NAMES.get(ann['type_code'], '?')})... ", end='')

            ann_id, success, field_count, error = process_announcement(ann, DB_PATH)

            if success:
                print(f"{field_count} 欄位")
                success_count += 1
            else:
                print(f"失敗: {error}")
                fail_count += 1

            time.sleep(args.delay)

    elapsed = time.time() - start_time

    print()
    print('=' * 50)
    print(f'成功: {success_count} 筆')
    print(f'失敗: {fail_count} 筆')
    print(f'耗時: {elapsed:.2f} 秒')
    if success_count > 0:
        print(f'平均: {elapsed / (success_count + fail_count):.3f} 秒/筆')
    print('=' * 50)


if __name__ == '__main__':
    main()
