import sys
sys.stdout.reconfigure(encoding='utf-8')

import json
import csv
import re
from pathlib import Path

# 讀取 JSON
json_path = Path(__file__).parent.parent / "PBC" / "Taiwan properties_cleaned.json"
with open(json_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 提取土地資料
lands = [item for item in data if item.get('類型') == '土地']
print(f"共找到 {len(lands)} 筆土地")

# 解析地號格式：臺北市內湖區潭美段一小段64號
def parse_land_number(full_address):
    # 移除「號」字
    addr = full_address.rstrip('號')

    # 解析縣市
    county_match = re.match(r'^(臺北市|新北市|桃園市|台中市|台南市|高雄市|[^市縣]+[市縣])', addr)
    if not county_match:
        return None, None, None

    county = county_match.group(1)
    rest = addr[len(county):]

    # 解析區（如果有）和段名
    # 格式可能是：內湖區潭美段一小段64 或 大同區大龍段二小段3
    section_match = re.match(r'^(.+段[一二三四五六七八九十]?小?段?)(\d+[-\d]*)$', rest)
    if section_match:
        section = section_match.group(1)
        land_num = section_match.group(2)
        return county, section, land_num

    return None, None, None

# 建立 CSV
output_path = Path(__file__).parent.parent / "data" / "land_numbers_input.csv"
output_path.parent.mkdir(parents=True, exist_ok=True)

with open(output_path, 'w', encoding='utf-8', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['county', 'section', 'land_number', 'original', 'area', 'share', 'land_value'])

    for land in lands:
        full_addr = land.get('地號', '')
        county, section, land_num = parse_land_number(full_addr)

        if county and section and land_num:
            writer.writerow([
                county,
                section,
                land_num,
                full_addr,
                land.get('面積_平方公尺', ''),
                land.get('持分', ''),
                land.get('公告現值', '')
            ])
        else:
            print(f"無法解析: {full_addr}")

print(f"已輸出至: {output_path}")
