#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
中央氣象署資料收集器
定期收集觀測資料、地震報告、警特報並儲存為 CSV

用法：
    python collector.py                 # 收集一次（所有資料類型）
    python collector.py --observation   # 只收集觀測資料
    python collector.py --earthquake    # 只收集地震報告
    python collector.py --alert         # 只收集警特報
    python collector.py --daemon        # 背景執行，每 3 小時收集一次

建議排程：
    - 觀測資料：每 3 小時（資料更新頻率約 10 分鐘，但不需太頻繁收集）
    - 地震報告：每 1 小時（即時性較重要）
    - 警特報：每 1 小時

儲存位置：
    data/observations/YYYY-MM/observations_YYYYMMDD.csv
    data/earthquakes/YYYY/earthquakes_YYYY.csv
    data/alerts/YYYY/alerts_YYYY.csv
"""

import sys
import os
import csv
import json
import time
import argparse
import requests
from datetime import datetime
from pathlib import Path

# Windows 編碼處理
sys.stdout.reconfigure(encoding='utf-8')

# API 基礎 URL
BASE_URL = 'https://opendata.cwa.gov.tw/api/v1/rest/datastore'

# API 端點
APIS = {
    'observation': 'O-A0001-001',  # 自動氣象站觀測資料
    'earthquake': 'E-A0016-002',   # 有感地震報告
    'alert': 'W-C0034-001',        # 天氣警特報
}

# 資料儲存目錄
DATA_DIR = Path(__file__).parent / 'data'


def get_api_key():
    """取得 API Key"""
    api_key = os.environ.get('CWA_API_KEY')

    if not api_key and sys.platform == 'win32':
        import subprocess
        try:
            result = subprocess.run(
                ['powershell', '-Command',
                 "[Environment]::GetEnvironmentVariable('CWA_API_KEY', 'User')"],
                capture_output=True, text=True, timeout=5
            )
            api_key = result.stdout.strip()
        except Exception:
            pass

    if not api_key:
        print('錯誤：未設定環境變數 CWA_API_KEY')
        sys.exit(1)
    return api_key


def fetch_api(api_code, params=None):
    """通用 API 請求函數"""
    api_key = get_api_key()
    url = f'{BASE_URL}/{api_code}'

    request_params = {'Authorization': api_key}
    if params:
        request_params.update(params)

    try:
        response = requests.get(url, params=request_params, timeout=30)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f'[錯誤] API 請求失敗（{api_code}）：{e}')
        return None


def ensure_dir(dir_path):
    """確保目錄存在"""
    Path(dir_path).mkdir(parents=True, exist_ok=True)


def collect_observation():
    """收集觀測資料"""
    now = datetime.now()
    data = fetch_api(APIS['observation'])

    if not data or data.get('success') != 'true':
        print(f'[{now}] 觀測資料收集失敗')
        return False

    # 準備儲存路徑
    month_dir = DATA_DIR / 'observations' / now.strftime('%Y-%m')
    ensure_dir(month_dir)
    csv_file = month_dir / f"observations_{now.strftime('%Y%m%d')}.csv"

    # 解析資料
    records = []
    for station in data['records']['Station']:
        weather = station.get('WeatherElement', {})
        geo = station.get('GeoInfo', {})
        obs_time = station.get('ObsTime', {}).get('DateTime', '')

        records.append({
            'collect_time': now.isoformat(),
            'obs_time': obs_time,
            'station_id': station.get('StationId', ''),
            'station_name': station.get('StationName', ''),
            'county': geo.get('CountyName', ''),
            'town': geo.get('TownName', ''),
            'lat': geo.get('Coordinates', [{}])[0].get('CoordinatesValue', ''),
            'lon': geo.get('Coordinates', [{}])[1].get('CoordinatesValue', '') if len(geo.get('Coordinates', [])) > 1 else '',
            'temperature': weather.get('AirTemperature', ''),
            'humidity': weather.get('RelativeHumidity', ''),
            'wind_speed': weather.get('WindSpeed', ''),
            'wind_dir': weather.get('WindDirection', ''),
            'pressure': weather.get('AirPressure', ''),
            'precipitation': weather.get('Now', {}).get('Precipitation', ''),
            'sunshine': weather.get('SunshineDuration', ''),
            'uv_index': weather.get('UVIndex', ''),
        })

    # 寫入 CSV（append 模式）
    file_exists = csv_file.exists()
    with open(csv_file, 'a', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=records[0].keys())
        if not file_exists:
            writer.writeheader()
        writer.writerows(records)

    print(f'[{now}] 觀測資料：{len(records)} 筆 → {csv_file}')
    return True


def collect_earthquake():
    """收集地震報告"""
    now = datetime.now()
    data = fetch_api(APIS['earthquake'], {'limit': 20})

    if not data or data.get('success') != 'true':
        print(f'[{now}] 地震報告收集失敗')
        return False

    # 準備儲存路徑
    year_dir = DATA_DIR / 'earthquakes' / now.strftime('%Y')
    ensure_dir(year_dir)
    csv_file = year_dir / f"earthquakes_{now.strftime('%Y')}.csv"

    # 讀取已存在的地震編號（避免重複）
    existing_ids = set()
    if csv_file.exists():
        with open(csv_file, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            for row in reader:
                existing_ids.add(row.get('earthquake_no', ''))

    # 解析資料
    new_records = []
    for eq in data.get('records', {}).get('Earthquake', []):
        eq_no = str(eq.get('EarthquakeNo', ''))
        if eq_no in existing_ids:
            continue  # 跳過已存在的記錄

        info = eq.get('EarthquakeInfo', {})
        epicenter = info.get('Epicenter', {})
        magnitude = info.get('EarthquakeMagnitude', {})

        new_records.append({
            'collect_time': now.isoformat(),
            'earthquake_no': eq_no,
            'origin_time': info.get('OriginTime', ''),
            'location': epicenter.get('Location', ''),
            'lat': epicenter.get('EpicenterLatitude', ''),
            'lon': epicenter.get('EpicenterLongitude', ''),
            'depth': info.get('FocalDepth', ''),
            'magnitude': magnitude.get('MagnitudeValue', ''),
            'magnitude_type': magnitude.get('MagnitudeType', ''),
            'report_content': eq.get('ReportContent', ''),
            'report_image': eq.get('ReportImageURI', ''),
            'web_url': eq.get('Web', ''),
        })

    if new_records:
        file_exists = csv_file.exists()
        with open(csv_file, 'a', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=new_records[0].keys())
            if not file_exists:
                writer.writeheader()
            writer.writerows(new_records)
        print(f'[{now}] 地震報告：{len(new_records)} 筆新增 → {csv_file}')
    else:
        print(f'[{now}] 地震報告：無新資料')

    return True


def collect_alert():
    """收集警特報"""
    now = datetime.now()
    data = fetch_api(APIS['alert'])

    if not data or data.get('success') != 'true':
        print(f'[{now}] 警特報收集失敗')
        return False

    # 準備儲存路徑
    year_dir = DATA_DIR / 'alerts' / now.strftime('%Y')
    ensure_dir(year_dir)
    csv_file = year_dir / f"alerts_{now.strftime('%Y')}.csv"

    # 讀取已存在的記錄 ID
    existing_ids = set()
    if csv_file.exists():
        with open(csv_file, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            for row in reader:
                # 使用 headline + start_time 作為唯一識別
                key = f"{row.get('headline', '')}_{row.get('start_time', '')}"
                existing_ids.add(key)

    # 解析資料
    new_records = []
    for record in data.get('records', {}).get('record', []):
        headline = record.get('datasetInfo', {}).get('datasetDescription', '')
        valid_time = record.get('validTime', {})
        start_time = valid_time.get('startTime', '')

        record_key = f"{headline}_{start_time}"
        if record_key in existing_ids:
            continue

        hazard = record.get('hazardConditions', {}).get('hazards', {}).get('hazard', {})
        affected = record.get('affectedAreas', {}).get('location', [])

        if isinstance(affected, list):
            areas = '|'.join([a.get('locationName', '') for a in affected])
        else:
            areas = affected.get('locationName', '')

        new_records.append({
            'collect_time': now.isoformat(),
            'headline': headline,
            'hazard_type': hazard.get('info', {}).get('phenomena', '') if isinstance(hazard, dict) else '',
            'significance': hazard.get('info', {}).get('significance', '') if isinstance(hazard, dict) else '',
            'start_time': start_time,
            'end_time': valid_time.get('endTime', ''),
            'affected_areas': areas,
            'content': record.get('contents', {}).get('content', {}).get('contentText', ''),
        })

    if new_records:
        file_exists = csv_file.exists()
        with open(csv_file, 'a', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=new_records[0].keys())
            if not file_exists:
                writer.writeheader()
            writer.writerows(new_records)
        print(f'[{now}] 警特報：{len(new_records)} 筆新增 → {csv_file}')
    else:
        print(f'[{now}] 警特報：無新資料')

    return True


def collect_all():
    """收集所有資料"""
    print(f"\n{'='*60}")
    print(f"  資料收集開始 - {datetime.now()}")
    print(f"{'='*60}\n")

    results = {
        '觀測資料': collect_observation(),
        '地震報告': collect_earthquake(),
        '警特報': collect_alert(),
    }

    print(f"\n{'='*60}")
    print("  收集結果：")
    for name, success in results.items():
        status = '成功' if success else '失敗'
        print(f"    {name}：{status}")
    print(f"{'='*60}\n")

    return all(results.values())


def daemon_mode(interval_hours=3):
    """背景執行模式"""
    interval_seconds = interval_hours * 3600
    print(f"啟動背景收集模式，每 {interval_hours} 小時執行一次")
    print("按 Ctrl+C 停止")
    print()

    while True:
        try:
            collect_all()
            print(f"下次收集時間：{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} + {interval_hours} 小時")
            time.sleep(interval_seconds)
        except KeyboardInterrupt:
            print("\n收集已停止")
            break


def main():
    parser = argparse.ArgumentParser(
        description='中央氣象署資料收集器',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog='''
範例：
  python collector.py                 收集所有資料
  python collector.py --observation   只收集觀測資料
  python collector.py --earthquake    只收集地震報告
  python collector.py --alert         只收集警特報
  python collector.py --daemon        背景執行（每 3 小時）
  python collector.py --daemon --interval 1  每 1 小時執行
        '''
    )
    parser.add_argument('--observation', '-o', action='store_true', help='收集觀測資料')
    parser.add_argument('--earthquake', '-e', action='store_true', help='收集地震報告')
    parser.add_argument('--alert', '-a', action='store_true', help='收集警特報')
    parser.add_argument('--daemon', '-d', action='store_true', help='背景執行模式')
    parser.add_argument('--interval', '-i', type=int, default=3, help='收集間隔（小時，預設 3）')

    args = parser.parse_args()

    # 背景模式
    if args.daemon:
        daemon_mode(args.interval)
        return

    # 選擇性收集
    if args.observation:
        collect_observation()
    elif args.earthquake:
        collect_earthquake()
    elif args.alert:
        collect_alert()
    else:
        # 預設收集全部
        collect_all()


if __name__ == '__main__':
    main()
