"""
建立 SQLite 資料庫
將 JSON 資料匯入 SQLite，方便複雜查詢與統計分析
"""
import sys
sys.stdout.reconfigure(encoding='utf-8')

import json
import sqlite3
from pathlib import Path
from datetime import datetime

def get_data_dir():
    return Path(__file__).parent / "data"

def create_database():
    db_path = get_data_dir() / "companies.db"

    # 刪除舊資料庫
    if db_path.exists():
        db_path.unlink()

    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # 建立公司資料表
    cursor.execute('''
        CREATE TABLE companies (
            code TEXT PRIMARY KEY,
            name TEXT,
            name_en TEXT,
            short_name_en TEXT,
            industry TEXT,
            chairman TEXT,
            ceo TEXT,
            spokesperson TEXT,
            deputy_spokesperson TEXT,
            accounting_firm TEXT,
            accountant1 TEXT,
            accountant2 TEXT,
            address TEXT,
            phone TEXT,
            fax TEXT,
            website TEXT,
            capital TEXT,
            listing_date TEXT,
            otc_date TEXT,
            emerging_date TEXT,
            public_date TEXT,
            tax_id TEXT,
            established_date TEXT,
            last_fetched TEXT,
            raw_data TEXT
        )
    ''')

    # 建立歷史記錄表
    cursor.execute('''
        CREATE TABLE company_history (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            code TEXT,
            fetched_at TEXT,
            record_type TEXT,
            chairman TEXT,
            ceo TEXT,
            spokesperson TEXT,
            deputy_spokesperson TEXT,
            accounting_firm TEXT,
            accountant1 TEXT,
            accountant2 TEXT,
            raw_data TEXT,
            FOREIGN KEY (code) REFERENCES companies(code)
        )
    ''')

    # 建立索引
    cursor.execute('CREATE INDEX idx_history_code ON company_history(code)')
    cursor.execute('CREATE INDEX idx_history_fetched ON company_history(fetched_at)')
    cursor.execute('CREATE INDEX idx_companies_industry ON companies(industry)')
    cursor.execute('CREATE INDEX idx_companies_firm ON companies(accounting_firm)')

    conn.commit()
    return conn

def import_company(cursor, code: str, data_dir: Path):
    """匯入單一公司資料"""
    latest_file = data_dir / f"{code}_latest.json"
    history_file = data_dir / f"{code}_history.json"

    if not latest_file.exists():
        return False

    # 讀取最新資料
    with open(latest_file, 'r', encoding='utf-8') as f:
        latest = json.load(f)

    company_data = latest.get('data', {})

    # 插入公司資料
    cursor.execute('''
        INSERT OR REPLACE INTO companies (
            code, name, name_en, short_name_en, industry,
            chairman, ceo, spokesperson, deputy_spokesperson,
            accounting_firm, accountant1, accountant2,
            address, phone, fax, website, capital,
            listing_date, otc_date, emerging_date, public_date,
            tax_id, established_date, last_fetched, raw_data
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', (
        code,
        company_data.get('公司名稱', ''),
        company_data.get('英文全名', ''),
        company_data.get('英文簡稱', ''),
        company_data.get('產業類別', ''),
        company_data.get('董事長', ''),
        company_data.get('總經理', ''),
        company_data.get('發言人', ''),
        company_data.get('代理發言人', ''),
        company_data.get('簽證會計師事務所', ''),
        company_data.get('簽證會計師1', ''),
        company_data.get('簽證會計師2', ''),
        company_data.get('地址', ''),
        company_data.get('總機', ''),
        company_data.get('傳真機號碼', ''),
        company_data.get('公司網址', ''),
        company_data.get('實收資本額', ''),
        company_data.get('上市日期', ''),
        company_data.get('上櫃日期', ''),
        company_data.get('興櫃日期', ''),
        company_data.get('公開發行日期', ''),
        company_data.get('營利事業統一編號', ''),
        company_data.get('公司成立日期', ''),
        latest.get('fetched_at', ''),
        json.dumps(company_data, ensure_ascii=False)
    ))

    # 匯入歷史記錄
    if history_file.exists():
        with open(history_file, 'r', encoding='utf-8') as f:
            history = json.load(f)

        for record in history:
            record_data = record.get('data', {})
            cursor.execute('''
                INSERT INTO company_history (
                    code, fetched_at, record_type,
                    chairman, ceo, spokesperson, deputy_spokesperson,
                    accounting_firm, accountant1, accountant2, raw_data
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                code,
                record.get('fetched_at', ''),
                record.get('record_type', 'full'),
                record_data.get('董事長', ''),
                record_data.get('總經理', ''),
                record_data.get('發言人', ''),
                record_data.get('代理發言人', ''),
                record_data.get('簽證會計師事務所', ''),
                record_data.get('簽證會計師1', ''),
                record_data.get('簽證會計師2', ''),
                json.dumps(record_data, ensure_ascii=False) if record_data else ''
            ))

    return True

def build_database():
    data_dir = get_data_dir()
    index_file = data_dir / "index.json"

    if not index_file.exists():
        print("錯誤：index.json 不存在，請先執行批次抓取")
        return

    with open(index_file, 'r', encoding='utf-8') as f:
        index = json.load(f)

    print(f"正在建立 SQLite 資料庫...")
    print(f"公司數量: {len(index)} 間\n")

    conn = create_database()
    cursor = conn.cursor()

    success = 0
    failed = 0

    for code in index.keys():
        if import_company(cursor, code, data_dir):
            success += 1
        else:
            failed += 1

        if success % 100 == 0:
            print(f"  已匯入 {success} 間...")
            conn.commit()

    conn.commit()

    # 統計資訊
    cursor.execute('SELECT COUNT(*) FROM companies')
    total_companies = cursor.fetchone()[0]

    cursor.execute('SELECT COUNT(*) FROM company_history')
    total_history = cursor.fetchone()[0]

    cursor.execute('SELECT COUNT(DISTINCT accounting_firm) FROM companies WHERE accounting_firm != ""')
    total_firms = cursor.fetchone()[0]

    cursor.execute('SELECT COUNT(DISTINCT industry) FROM companies WHERE industry != ""')
    total_industries = cursor.fetchone()[0]

    conn.close()

    db_path = data_dir / "companies.db"
    size_kb = db_path.stat().st_size / 1024
    size_mb = size_kb / 1024

    print(f"\n=== 建立完成 ===")
    print(f"公司數量: {total_companies} 間")
    print(f"歷史記錄: {total_history} 筆")
    print(f"會計師事務所: {total_firms} 間")
    print(f"產業類別: {total_industries} 種")
    print(f"檔案大小: {size_kb:.0f} KB ({size_mb:.2f} MB)")
    print(f"輸出: {db_path}")

def show_sample_queries():
    """顯示範例查詢"""
    print("""
=== 範例 SQL 查詢 ===

# 連接資料庫
sqlite3 data/companies.db

# 各會計師事務所簽證數量排名
SELECT accounting_firm, COUNT(*) as count
FROM companies
WHERE accounting_firm != ''
GROUP BY accounting_firm
ORDER BY count DESC
LIMIT 10;

# 各產業公司數量
SELECT industry, COUNT(*) as count
FROM companies
WHERE industry != ''
GROUP BY industry
ORDER BY count DESC;

# 查詢特定會計師簽證的公司
SELECT code, name, industry
FROM companies
WHERE accountant1 LIKE '%張%' OR accountant2 LIKE '%張%';

# 最近有變更的公司（有多筆歷史記錄）
SELECT code, name, COUNT(*) as records
FROM companies c
JOIN company_history h ON c.code = h.code
GROUP BY c.code
HAVING records > 1
ORDER BY records DESC;

# 勤業眾信簽證的上市櫃公司佔比
SELECT
    accounting_firm,
    COUNT(*) as count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM companies WHERE accounting_firm != ''), 2) as percentage
FROM companies
WHERE accounting_firm != ''
GROUP BY accounting_firm
ORDER BY count DESC;
""")

def main():
    import argparse

    parser = argparse.ArgumentParser(description='建立 SQLite 資料庫')
    parser.add_argument('--examples', '-e', action='store_true', help='顯示範例查詢')

    args = parser.parse_args()

    if args.examples:
        show_sample_queries()
    else:
        build_database()

if __name__ == '__main__':
    main()
