#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
MOPS 歷史重大訊息 API 爬蟲
==========================

使用 HTTP API 直接查詢公開資訊觀測站的歷史重大訊息（t05st01），
不需要 Playwright。

功能：
- 查詢特定公司的歷史重大訊息
- 查詢特定日期範圍的所有公司重大訊息
- 取得公告詳細內容
- 儲存到 CSV 或資料庫

使用方式：
  # 查詢台積電 114 年 1 月的重大訊息
  python api_historical_announcements.py --company 2330 --year 114 --month 1

  # 查詢所有公司 114 年 1 月 9 日的重大訊息
  python api_historical_announcements.py --year 114 --month 1 --start-day 9 --end-day 9

  # 查詢並取得詳細內容
  python api_historical_announcements.py --company 2330 --year 114 --month 1 --fetch-details

  # 輸出到 CSV
  python api_historical_announcements.py --company 2330 --year 114 --month 1 --output csv

  # 列出可用參數
  python api_historical_announcements.py --help
"""

import sys
sys.stdout.reconfigure(encoding='utf-8')

import argparse
import csv
import re
import time
from datetime import datetime
from pathlib import Path
from typing import Optional, List, Dict, Any
from concurrent.futures import ThreadPoolExecutor, as_completed

import requests
from bs4 import BeautifulSoup

# ===== 設定 =====
BASE_DIR = Path(__file__).parent
DATA_DIR = BASE_DIR / "data"
OUTPUT_DIR = DATA_DIR / "historical"

# API 端點
API_URL = "https://mopsov.twse.com.tw/mops/web/ajax_t05st01"

# 市場類型
MARKET_TYPES = {
    'all': '全部',
    'sii': '上市',
    'otc': '上櫃',
    'rotc': '興櫃',
    'pub': '公開發行',
}


class HistoricalAnnouncementsFetcher:
    """歷史重大訊息 API 爬蟲"""

    def __init__(self, delay: float = 0.3):
        """
        Args:
            delay: 請求間隔秒數（預設 0.3 秒）
        """
        self.delay = delay
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8',
        })

    def fetch_list(
        self,
        year: int,
        month: int,
        company: str = '',
        start_day: int = 1,
        end_day: int = 31,
        market: str = 'all'
    ) -> List[Dict[str, Any]]:
        """
        取得重大訊息列表

        Args:
            year: 民國年（如 114）
            month: 月份（1-12）
            company: 公司代號或簡稱（空字串=全部）
            start_day: 起始日（1-31）
            end_day: 結束日（1-31）
            market: 市場類型（all/sii/otc/rotc/pub）

        Returns:
            公告列表 [{'stock_code', 'company_name', 'date', 'time', 'subject', 'seq_no', 'spoke_date', 'spoke_time'}, ...]
        """
        data = {
            'encodeURIComponent': '1',
            'step': '1',
            'firstin': '1',
            'off': '1',
            'TYPEK': market,
            'co_id': company,
            'year': str(year),
            'month': str(month).zfill(2),
            'b_date': str(start_day).zfill(2),
            'e_date': str(end_day).zfill(2),
        }

        try:
            response = self.session.post(API_URL, data=data, timeout=30)
            response.raise_for_status()

            soup = BeautifulSoup(response.text, 'html.parser')
            announcements = []

            # 解析表格
            table = soup.find('table', class_='hasBorder')
            if not table:
                return []

            rows = table.find_all('tr')
            for row in rows[1:]:  # 跳過標題列
                cells = row.find_all('td')
                if len(cells) < 5:
                    continue

                stock_code = cells[0].get_text(strip=True)
                company_name = cells[1].get_text(strip=True)
                date = cells[2].get_text(strip=True)
                time_str = cells[3].get_text(strip=True)
                subject = cells[4].get_text(strip=True)

                # 從 onclick 提取詳細資料參數
                button = row.find('input', {'type': 'button'})
                seq_no = spoke_date = spoke_time = None

                if button and button.get('onclick'):
                    onclick = button['onclick']
                    # 提取 seq_no, spoke_date, spoke_time
                    seq_match = re.search(r"seq_no\.value='(\d+)'", onclick)
                    date_match = re.search(r"spoke_date\.value='(\d+)'", onclick)
                    time_match = re.search(r"spoke_time\.value='(\d+)'", onclick)

                    if seq_match:
                        seq_no = seq_match.group(1)
                    if date_match:
                        spoke_date = date_match.group(1)
                    if time_match:
                        spoke_time = time_match.group(1)

                announcements.append({
                    'stock_code': stock_code,
                    'company_name': company_name,
                    'date': date,
                    'time': time_str,
                    'subject': subject,
                    'seq_no': seq_no,
                    'spoke_date': spoke_date,
                    'spoke_time': spoke_time,
                })

            return announcements

        except Exception as e:
            print(f"取得列表失敗: {e}")
            return []

    def fetch_detail(
        self,
        company: str,
        year: int,
        month: int,
        seq_no: str,
        spoke_date: str,
        spoke_time: str
    ) -> Dict[str, Any]:
        """
        取得單筆公告的詳細內容

        Args:
            company: 公司代號
            year: 民國年
            month: 月份
            seq_no: 序號
            spoke_date: 發言日期（YYYYMMDD）
            spoke_time: 發言時間（HHMMSS）

        Returns:
            詳細資料 dict
        """
        data = {
            'step': '2',
            'firstin': '1',
            'off': '1',
            'TYPEK': 'all',
            'co_id': company,
            'year': str(year),
            'month': str(month).zfill(2),
            'seq_no': seq_no,
            'spoke_date': spoke_date,
            'spoke_time': spoke_time,
        }

        try:
            response = self.session.post(API_URL, data=data, timeout=30)
            response.raise_for_status()

            soup = BeautifulSoup(response.text, 'html.parser')
            detail = {}

            # 解析詳細欄位
            tables = soup.find_all('table', class_='hasBorder')
            for table in tables:
                rows = table.find_all('tr')
                for row in rows:
                    cells = row.find_all(['th', 'td'])
                    if len(cells) >= 2:
                        key = cells[0].get_text(strip=True)
                        value = cells[1].get_text(strip=True)
                        if key and key not in ('欄位', '內容'):
                            detail[key] = value

            return detail

        except Exception as e:
            print(f"取得詳細資料失敗: {e}")
            return {}

    def fetch_with_details(
        self,
        year: int,
        month: int,
        company: str = '',
        start_day: int = 1,
        end_day: int = 31,
        market: str = 'all',
        concurrent: int = 3
    ) -> List[Dict[str, Any]]:
        """
        取得重大訊息列表並附加詳細內容

        Args:
            year: 民國年
            month: 月份
            company: 公司代號（空字串=全部）
            start_day: 起始日
            end_day: 結束日
            market: 市場類型
            concurrent: 併發數

        Returns:
            附帶詳細內容的公告列表
        """
        # 先取得列表
        announcements = self.fetch_list(year, month, company, start_day, end_day, market)

        if not announcements:
            return []

        print(f"找到 {len(announcements)} 筆公告，開始取得詳細內容...")

        # 併發取得詳細內容
        def fetch_one(ann):
            if not all([ann.get('seq_no'), ann.get('spoke_date'), ann.get('spoke_time')]):
                return ann

            time.sleep(self.delay)
            detail = self.fetch_detail(
                ann['stock_code'],
                year,
                month,
                ann['seq_no'],
                ann['spoke_date'],
                ann['spoke_time']
            )
            ann['detail'] = detail
            return ann

        if concurrent > 1:
            with ThreadPoolExecutor(max_workers=concurrent) as executor:
                futures = {executor.submit(fetch_one, ann): ann for ann in announcements}
                results = []
                for i, future in enumerate(as_completed(futures)):
                    result = future.result()
                    results.append(result)
                    print(f"  [{i+1}/{len(announcements)}] {result['stock_code']} {result['company_name']}")
                return results
        else:
            results = []
            for i, ann in enumerate(announcements):
                result = fetch_one(ann)
                results.append(result)
                print(f"  [{i+1}/{len(announcements)}] {result['stock_code']} {result['company_name']}")
            return results


def save_to_csv(announcements: List[Dict[str, Any]], output_path: Path):
    """儲存到 CSV"""
    if not announcements:
        print("沒有資料可儲存")
        return

    output_path.parent.mkdir(parents=True, exist_ok=True)

    # 基本欄位
    fieldnames = ['stock_code', 'company_name', 'date', 'time', 'subject']

    # 如果有詳細內容，加入詳細欄位
    if 'detail' in announcements[0] and announcements[0]['detail']:
        detail_keys = set()
        for ann in announcements:
            if ann.get('detail'):
                detail_keys.update(ann['detail'].keys())
        fieldnames.extend(sorted(detail_keys))

    with open(output_path, 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
        writer.writeheader()

        for ann in announcements:
            row = {k: v for k, v in ann.items() if k != 'detail'}
            if ann.get('detail'):
                row.update(ann['detail'])
            writer.writerow(row)

    print(f"已儲存到: {output_path}")


def main():
    parser = argparse.ArgumentParser(
        description='MOPS 歷史重大訊息 API 爬蟲',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
範例：
  # 查詢台積電 114 年 1 月的重大訊息
  python api_historical_announcements.py --company 2330 --year 114 --month 1

  # 查詢所有公司 114 年 1 月 9 日的重大訊息
  python api_historical_announcements.py --year 114 --month 1 --start-day 9 --end-day 9

  # 查詢並取得詳細內容，輸出到 CSV
  python api_historical_announcements.py --company 2330 --year 114 --month 1 --fetch-details --output csv
        """
    )

    parser.add_argument('--company', '-c', default='', help='公司代號或簡稱（不指定=全部公司）')
    parser.add_argument('--year', '-y', type=int, required=True, help='民國年（如 114）')
    parser.add_argument('--month', '-m', type=int, required=True, help='月份（1-12）')
    parser.add_argument('--start-day', type=int, default=1, help='起始日（預設 1）')
    parser.add_argument('--end-day', type=int, default=31, help='結束日（預設 31）')
    parser.add_argument('--market', choices=MARKET_TYPES.keys(), default='all', help='市場類型')
    parser.add_argument('--fetch-details', '-d', action='store_true', help='取得詳細內容')
    parser.add_argument('--concurrent', type=int, default=3, help='併發數（預設 3）')
    parser.add_argument('--delay', type=float, default=0.3, help='請求間隔秒數（預設 0.3）')
    parser.add_argument('--output', '-o', choices=['print', 'csv'], default='print', help='輸出格式')
    parser.add_argument('--output-file', help='輸出檔案路徑（預設自動產生）')

    args = parser.parse_args()

    # 顯示查詢條件
    print('=' * 60)
    print('MOPS 歷史重大訊息 API 爬蟲')
    print('=' * 60)
    print(f"查詢條件:")
    print(f"  公司: {args.company or '全部'}")
    print(f"  期間: 民國 {args.year} 年 {args.month} 月 {args.start_day} 日 ~ {args.end_day} 日")
    print(f"  市場: {MARKET_TYPES.get(args.market, args.market)}")
    print(f"  取得詳細: {'是' if args.fetch_details else '否'}")
    print('-' * 60)
    print()

    fetcher = HistoricalAnnouncementsFetcher(delay=args.delay)

    # 取得資料
    if args.fetch_details:
        announcements = fetcher.fetch_with_details(
            year=args.year,
            month=args.month,
            company=args.company,
            start_day=args.start_day,
            end_day=args.end_day,
            market=args.market,
            concurrent=args.concurrent
        )
    else:
        announcements = fetcher.fetch_list(
            year=args.year,
            month=args.month,
            company=args.company,
            start_day=args.start_day,
            end_day=args.end_day,
            market=args.market
        )

    print()
    print(f"共取得 {len(announcements)} 筆公告")
    print()

    # 輸出
    if args.output == 'csv':
        if args.output_file:
            output_path = Path(args.output_file)
        else:
            company_str = args.company or 'all'
            date_str = f"{args.year}{args.month:02d}"
            output_path = OUTPUT_DIR / f"announcements_{company_str}_{date_str}.csv"

        save_to_csv(announcements, output_path)

    else:  # print
        for ann in announcements[:20]:  # 最多顯示 20 筆
            print(f"[{ann['stock_code']}] {ann['company_name']}")
            print(f"  {ann['date']} {ann['time']}")
            print(f"  {ann['subject'][:60]}...")
            if ann.get('detail'):
                print(f"  (詳細: {len(ann['detail'])} 欄位)")
            print()

        if len(announcements) > 20:
            print(f"... 還有 {len(announcements) - 20} 筆未顯示")

    print()
    print('=' * 60)
    print('完成')
    print('=' * 60)


if __name__ == '__main__':
    main()