import argparse
import re
import sys
from pathlib import Path
from urllib.parse import urljoin
from urllib.request import Request, urlopen

import convert


DEFAULT_PAGE_URL = "https://www.rhb.ch/de/bahnfans/lokdienste/"
PDF_LINK_RE = re.compile(r'href="([^"]*Lokdienst_\d{2}\.\d{2}\.\d{4}\.pdf[^"]*)"', re.IGNORECASE)
ANCHOR_HINT_RE = re.compile(r"Täglicher\s+Lokdienst\s+öffnen", re.IGNORECASE)


def fetch_text(url: str) -> str:
    req = Request(url, headers={"User-Agent": "Mozilla/5.0 (LokdienstAutoImporter)"})
    with urlopen(req, timeout=30) as resp:
        return resp.read().decode("utf-8", errors="replace")


def fetch_bytes(url: str) -> bytes:
    req = Request(url, headers={"User-Agent": "Mozilla/5.0 (LokdienstAutoImporter)"})
    with urlopen(req, timeout=60) as resp:
        return resp.read()


def find_latest_pdf_link(page_html: str, page_url: str) -> str:
    # Prefer the anchor that contains the visible button text, but fallback to any Lokdienst_*.pdf href.
    idx = None
    m_hint = ANCHOR_HINT_RE.search(page_html)
    if m_hint:
        idx = m_hint.start()

    if idx is not None:
        window_start = max(0, idx - 1500)
        window_end = min(len(page_html), idx + 1500)
        snippet = page_html[window_start:window_end]
        m_pdf = PDF_LINK_RE.search(snippet)
        if m_pdf:
            return urljoin(page_url, m_pdf.group(1))

    m_pdf = PDF_LINK_RE.search(page_html)
    if not m_pdf:
        raise RuntimeError("Nem találtam Lokdienst PDF linket az oldalon.")
    return urljoin(page_url, m_pdf.group(1))


def main() -> None:
    parser = argparse.ArgumentParser(description="Auto download + import latest RhB Lokdienst PDF")
    parser.add_argument("--page-url", default=DEFAULT_PAGE_URL, help="RHB page URL that contains the daily Lokdienst button")
    parser.add_argument("--download-dir", default="data/pdf_archive", help="Directory where PDFs are stored")
    parser.add_argument("--mysql-config", default="php_web/config.php", help="MySQL config file path")
    parser.add_argument("--db", default=None, help="Deprecated. SQLite is removed; use --mysql-config.")
    parser.add_argument("--debug", action="store_true", help="Enable parser debug output")
    parser.add_argument("--force-redownload", action="store_true", help="Redownload PDF even if file exists locally")
    args = parser.parse_args()
    if args.db:
        raise RuntimeError("`--db` már nem támogatott. SQLite eltávolítva, használd: --mysql-config php_web/config.php")

    download_dir = Path(args.download_dir)
    download_dir.mkdir(parents=True, exist_ok=True)

    print(f"[AUTO] page={args.page_url}")
    html = fetch_text(args.page_url)
    pdf_url = find_latest_pdf_link(html, args.page_url)
    filename = Path(pdf_url.split("?")[0]).name
    if not filename.lower().endswith(".pdf"):
        raise RuntimeError(f"Nem PDF fájlnév: {filename}")

    target_path = download_dir / filename
    print(f"[AUTO] found_pdf={pdf_url}")
    print(f"[AUTO] target={target_path}")

    if not target_path.exists() or args.force_redownload:
        pdf_bytes = fetch_bytes(pdf_url)
        target_path.write_bytes(pdf_bytes)
        print(f"[AUTO] downloaded bytes={len(pdf_bytes)}")
    else:
        print("[AUTO] already exists, skip download")

    convert.convert_pdf(
        str(target_path),
        args.mysql_config,
        debug=args.debug,
        append=True,
    )
    print("[AUTO] mysql import done")

    print("[AUTO] import pipeline done")


if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print(f"[AUTO][ERROR] {e}", file=sys.stderr)
        raise