"""591 建案圖片下載器 — 從建案頁面抓取所有相簿圖片"""
import sys
sys.stdout.reconfigure(encoding='utf-8')

import json
import logging
import os
import re
import time
from datetime import datetime
from urllib.parse import parse_qs, urlparse

import requests
from bs4 import BeautifulSoup

import config
from collector import jittered_delay, NewhouseCollector
from database import get_connection, init_db

log = logging.getLogger(__name__)

os.makedirs(config.LOG_DIR, exist_ok=True)
log_file = os.path.join(config.LOG_DIR, f"images_{datetime.now():%Y%m}.log")
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler(log_file, encoding="utf-8"),
        logging.StreamHandler(sys.stdout),
    ],
)

# 591 CDN 圖片尺寸後綴
IMAGE_SIZES = {
    "thumb": "!660x495.water3.jpg",   # 縮圖（含浮水印）
    "medium": "!1000x750.jpg",         # 中等
    "large": "!1200x900.jpg",          # 大
    "hd": "!1600x1200.jpg",            # 高解析（推薦）
    "full": "!2000x1500.jpg",          # 最大可用
}

# 圖片分類中英對照
CATEGORY_NAMES = {
    "logo": "封面",
    "layout": "格局圖",
    "plan": "平面圖",
    "traffic": "交通圖",
    "3d": "3D透視",
    "realistic": "實景",
    "circum": "周邊環境",
    "ad": "廣告",
    "public": "公設",
    "sample": "實品屋",
    "vr": "VR",
}


class ImageDownloader:
    def __init__(self, size: str = "hd"):
        self.collector = NewhouseCollector()
        self.size_suffix = IMAGE_SIZES.get(size, IMAGE_SIZES["hd"])
        self.size_name = size

    def fetch_page(self, housing_id: int) -> str | None:
        """取得建案頁面 HTML"""
        url = f"{config.BASE_URL}/{housing_id}"
        for attempt in range(3):
            try:
                jittered_delay()
                resp = self.collector.session.get(url, timeout=30)
                if resp.status_code in (403, 429, 503):
                    log.warning(f"限流 (HTTP {resp.status_code})，等待...")
                    time.sleep(30 * (attempt + 1))
                    if attempt == 0:
                        self.collector._rebuild_session()
                    continue
                resp.raise_for_status()
                return resp.text
            except requests.exceptions.ConnectionError as e:
                log.error(f"連線錯誤: {e}")
                self.collector._rebuild_session()
                time.sleep(10)
            except Exception as e:
                log.error(f"錯誤 (housing_id={housing_id}): {e}")
                if attempt < 2:
                    time.sleep(5)
        return None

    def parse_images(self, html: str, housing_id: int) -> list[dict]:
        """從頁面 HTML 解析所有相簿圖片

        591 頁面結構：
        - <div class="intro-photo-preview"> 包含所有相簿圖片
        - 每張圖片的 <a> 父元素有 href="/134497/album/view?cate=layout&index=0"
        - <img> 的 data-src 或 src 包含 CDN URL
        """
        soup = BeautifulSoup(html, "html.parser")
        images = []

        # 方法 1：從 intro-photo-preview 容器解析
        container = soup.find("div", class_="intro-photo-preview")
        if container:
            for img in container.find_all("img"):
                src = img.get("data-src") or img.get("src") or ""
                if not src or "591.com.tw" not in src:
                    continue

                # 解析分類
                category = "other"
                parent_a = img.find_parent("a")
                if parent_a and parent_a.get("href"):
                    href = parent_a["href"]
                    qs = parse_qs(urlparse(href).query)
                    if "cate" in qs:
                        category = qs["cate"][0]

                # 清理 URL — 移除尺寸後綴，取得 base URL
                base_url = re.sub(r'![^/]+$', '', src)

                images.append({
                    "base_url": base_url,
                    "category": category,
                    "alt": img.get("alt", ""),
                })

        # 方法 2：備用 — 從所有 img 標籤找 591 CDN 圖片
        if not images:
            for img in soup.find_all("img"):
                src = img.get("data-src") or img.get("src") or ""
                if "img1.591.com.tw/house" in src or "img2.591.com.tw/house" in src:
                    base_url = re.sub(r'![^/]+$', '', src)
                    images.append({
                        "base_url": base_url,
                        "category": "other",
                        "alt": img.get("alt", ""),
                    })

        # 去重（同一 base_url 只保留一次）
        seen = set()
        unique = []
        for img in images:
            if img["base_url"] not in seen:
                seen.add(img["base_url"])
                unique.append(img)

        return unique

    def download_images(self, housing_id: int, force: bool = False) -> dict:
        """下載單一建案的所有圖片

        Returns:
            manifest dict with image metadata
        """
        out_dir = os.path.join("data", "images", str(housing_id))
        manifest_path = os.path.join(out_dir, "manifest.json")

        # 斷點續傳：如果 manifest 已存在且不是 force 模式，跳過
        if not force and os.path.exists(manifest_path):
            log.info(f"[{housing_id}] 已有 manifest，跳過（使用 --force 重新下載）")
            with open(manifest_path, "r", encoding="utf-8") as f:
                return json.load(f)

        log.info(f"[{housing_id}] 開始下載圖片...")

        # 1. 取得頁面 HTML
        html = self.fetch_page(housing_id)
        if not html:
            log.error(f"[{housing_id}] 無法取得頁面")
            return {"housing_id": housing_id, "images": [], "error": "fetch_failed"}

        # 2. 解析圖片
        parsed = self.parse_images(html, housing_id)
        if not parsed:
            log.warning(f"[{housing_id}] 未找到圖片")
            return {"housing_id": housing_id, "images": []}

        log.info(f"[{housing_id}] 找到 {len(parsed)} 張圖片")

        # 3. 按分類計數，生成檔名
        cat_counter = {}
        for img in parsed:
            cat = img["category"]
            idx = cat_counter.get(cat, 0)
            cat_counter[cat] = idx + 1
            img["index"] = idx
            img["filename"] = f"{cat}_{idx:02d}.jpg"

        # 4. 下載
        os.makedirs(out_dir, exist_ok=True)
        success = 0
        fail = 0
        manifest_images = []

        for img in parsed:
            dl_url = img["base_url"] + self.size_suffix
            filepath = os.path.join(out_dir, img["filename"])

            # 如果檔案已存在且大於 10KB，跳過
            if not force and os.path.exists(filepath) and os.path.getsize(filepath) > 10240:
                manifest_images.append({
                    "filename": img["filename"],
                    "category": img["category"],
                    "category_name": CATEGORY_NAMES.get(img["category"], img["category"]),
                    "index": img["index"],
                    "size": os.path.getsize(filepath),
                })
                success += 1
                continue

            try:
                jittered_delay(base=0.5)
                resp = self.collector.session.get(dl_url, timeout=30)
                if resp.status_code == 200 and len(resp.content) > 5000:
                    with open(filepath, "wb") as f:
                        f.write(resp.content)
                    manifest_images.append({
                        "filename": img["filename"],
                        "category": img["category"],
                        "category_name": CATEGORY_NAMES.get(img["category"], img["category"]),
                        "index": img["index"],
                        "size": len(resp.content),
                    })
                    success += 1
                else:
                    log.warning(f"  {img['filename']}: HTTP {resp.status_code}, {len(resp.content)} bytes")
                    fail += 1
            except Exception as e:
                log.error(f"  {img['filename']}: {e}")
                fail += 1

        # 5. 寫入 manifest
        manifest = {
            "housing_id": housing_id,
            "downloaded_at": datetime.now().isoformat(),
            "size_preset": self.size_name,
            "total": len(parsed),
            "success": success,
            "fail": fail,
            "categories": {
                cat: {
                    "count": count,
                    "name": CATEGORY_NAMES.get(cat, cat),
                }
                for cat, count in sorted(cat_counter.items())
            },
            "images": manifest_images,
        }

        with open(manifest_path, "w", encoding="utf-8") as f:
            json.dump(manifest, f, ensure_ascii=False, indent=2)

        total_size = sum(im["size"] for im in manifest_images)
        log.info(
            f"[{housing_id}] 完成: {success}/{len(parsed)} 成功, "
            f"{fail} 失敗, {total_size/1024/1024:.1f} MB"
        )

        return manifest

    def download_batch(self, housing_ids: list[int], force: bool = False):
        """批次下載多個建案的圖片"""
        if not self.collector._get_csrf_token():
            log.warning("CSRF Token 取得失敗，嘗試繼續...")

        total = len(housing_ids)
        log.info(f"開始批次下載 {total} 個建案的圖片...")

        results = {"success": 0, "fail": 0, "skip": 0}

        for i, hid in enumerate(housing_ids, 1):
            log.info(f"[{i}/{total}] 建案 {hid}")
            manifest = self.download_images(hid, force=force)

            if manifest.get("error"):
                results["fail"] += 1
            elif not manifest.get("images"):
                results["skip"] += 1
            else:
                results["success"] += 1

            # 建案間休息
            if i < total:
                time.sleep(2)

        log.info(
            f"批次完成: 成功 {results['success']}, "
            f"失敗 {results['fail']}, 跳過 {results['skip']}"
        )
        return results


def get_all_project_ids() -> list[int]:
    """從資料庫取得所有建案 ID"""
    conn = get_connection()
    rows = conn.execute("SELECT id FROM projects ORDER BY id").fetchall()
    conn.close()
    return [r["id"] for r in rows]


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="591 建案圖片下載器")
    parser.add_argument(
        "--id", type=int, nargs="+",
        help="指定建案 ID（可多個）"
    )
    parser.add_argument(
        "--all", action="store_true",
        help="下載所有建案的圖片"
    )
    parser.add_argument(
        "--size", choices=list(IMAGE_SIZES.keys()), default="hd",
        help="圖片尺寸 (預設: hd=1600x1200)"
    )
    parser.add_argument(
        "--force", action="store_true",
        help="強制重新下載（忽略已下載的檔案）"
    )
    parser.add_argument(
        "--limit", type=int, default=0,
        help="最多下載幾個建案（0=全部）"
    )
    args = parser.parse_args()

    init_db()

    if not args.id and not args.all:
        parser.error("請指定 --id 或 --all")

    downloader = ImageDownloader(size=args.size)

    if args.id:
        ids = args.id
    else:
        ids = get_all_project_ids()
        if args.limit > 0:
            ids = ids[:args.limit]

    if len(ids) == 1:
        downloader.collector._get_csrf_token()
        downloader.download_images(ids[0], force=args.force)
    else:
        downloader.download_batch(ids, force=args.force)
