#!/usr/bin/env python3
"""
把 Markdown 渲染成“正常公众号文章结构”预览 HTML。
核心目标：保住文章结构，不把连续论述切成短句墙。
"""
from __future__ import annotations

import argparse
import json
from pathlib import Path
import markdown
from bs4 import BeautifulSoup, Tag, NavigableString

STYLE = """
.article {
  max-width: 760px;
  margin: 0 auto;
  padding: 24px 22px 60px;
  color: #222;
  background: #fff;
  font-family: -apple-system,BlinkMacSystemFont,"PingFang SC","Hiragino Sans GB","Microsoft YaHei",sans-serif;
  font-size: 13px;
  line-height: 1.92;
}
.chapter-title { margin: 52px 0 26px; }
.chapter-title h2 {
  margin: 0;
  text-align: center;
  font-family: "Songti SC","STSong","Noto Serif CJK SC",serif;
  font-size: 1.46em;
  line-height: 1.7;
  font-weight: 700;
  color: #1f1f1f;
}
h3 {
  margin: 32px 0 12px;
  font-size: 1.06em;
  line-height: 1.8;
  font-weight: 700;
  color: #303030;
}
p { margin: 0 0 18px; text-align: left; }
blockquote {
  margin: 22px 0;
  padding: 0 0 0 16px;
  border-left: 3px solid #b8aa96;
  color: #343434;
}
blockquote p { margin: 0 0 10px; }
blockquote p:last-child { margin-bottom: 0; }
ul, ol { margin: 10px 0 20px 1.35em; padding: 0; }
li { margin: 8px 0; }
.sep { border: none; border-top: 1px solid #ece7df; margin: 30px 0; }
.img-wrap { margin: 26px 0; }
.img-wrap img { display: block; max-width: 100%; margin: 0 auto; }
code {
  font-family: ui-monospace,SFMono-Regular,Menlo,monospace;
  font-size: 0.92em;
  background: #f5f5f5;
  padding: 0.12em 0.35em;
  border-radius: 4px;
}
strong { font-weight: 700; }
"""


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description='把 Markdown 渲染成正常公众号文章结构 HTML')
    parser.add_argument('--markdown', required=True, help='源 Markdown 路径')
    parser.add_argument('--manifest', help='可选的素材 manifest；若提供，会把正文图片替换成 wechat_url')
    parser.add_argument('--output', required=True, help='输出 HTML 路径')
    return parser.parse_args()


def load_manifest(manifest_path: Path) -> dict:
    return json.loads(manifest_path.read_text())


def get_img_url(img: Tag) -> str:
    return str(img.get('src') or img.get('data-src') or '').strip()


def resolve_wechat_url(manifest: dict, markdown_path: Path, src: str) -> str:
    src_path = Path(src)
    original_abs = (markdown_path.parent / src_path).resolve() if not src_path.is_absolute() else src_path.resolve()
    original_name = src_path.name
    for asset in manifest.get('assets', []):
        local_path = str(asset.get('local_path', '')).strip()
        wechat_url = str(asset.get('wechat_url', '')).strip()
        if not local_path:
            continue
        asset_abs = (manifest_path_cache.parent / Path(local_path)).resolve()
        if asset_abs == original_abs or asset_abs.name == original_name:
            if not wechat_url:
                raise ValueError(f'图片缺少 wechat_url：{local_path}')
            return wechat_url
    raise ValueError(f'manifest 中找不到正文图片：{src}')


def get_required_assets_by_role(manifest: dict, role: str) -> list[dict]:
    assets: list[dict] = []
    for asset in manifest.get('assets', []):
        if asset.get('role') != role or asset.get('required') is not True:
            continue
        wechat_url = str(asset.get('wechat_url', '')).strip()
        if not wechat_url:
            raise ValueError(f'{role} 资产缺少 wechat_url：{asset.get("key")}')
        assets.append(asset)
    return assets


def is_image_only_paragraph(tag: Tag) -> bool:
    if tag.name != 'p':
        return False
    meaningful = []
    for child in tag.contents:
        if isinstance(child, NavigableString):
            if child.strip():
                meaningful.append(child)
        else:
            meaningful.append(child)
    return len(meaningful) == 1 and isinstance(meaningful[0], Tag) and meaningful[0].name == 'img'


def wrap_image_paragraph(tag: Tag, soup: BeautifulSoup) -> Tag:
    figure = soup.new_tag('figure', attrs={'class': 'img-wrap'})
    img = tag.img
    if img is not None:
        figure.append(img.extract())
    return figure


def wrap_h2(tag: Tag, soup: BeautifulSoup) -> Tag:
    section = soup.new_tag('section', attrs={'class': 'chapter-title'})
    section.append(tag.extract())
    return section


def build_fixed_asset_figure(soup: BeautifulSoup, url: str, alt: str) -> Tag:
    figure = soup.new_tag('figure', attrs={'class': 'img-wrap'})
    img = soup.new_tag('img')
    img['src'] = url
    img['alt'] = alt
    figure.append(img)
    return figure


def get_meaningful_children(article: Tag) -> list[Tag]:
    meaningful: list[Tag] = []
    for child in article.contents:
        if isinstance(child, NavigableString):
            continue
        if isinstance(child, Tag) and child.name != 'style':
            meaningful.append(child)
    return meaningful


def get_asset_container(img: Tag) -> Tag:
    parent = img.parent
    if isinstance(parent, Tag) and parent.name in {'figure', 'p'}:
        return parent
    return img


def find_asset_container(article: Tag, url: str) -> Tag | None:
    for img in article.find_all('img'):
        if get_img_url(img) == url:
            return get_asset_container(img)
    return None


def insert_after_style(article: Tag, node: Tag) -> None:
    style = article.find('style', recursive=False)
    if style is None:
        article.insert(0, node)
        return
    style.insert_after(node)


def ensure_fixed_edge_asset(article: Tag, soup: BeautifulSoup, url: str, alt: str, position: str) -> None:
    existing = find_asset_container(article, url)
    node = existing.extract() if existing is not None else build_fixed_asset_figure(soup, url, alt)
    if position == 'start':
        insert_after_style(article, node)
        return
    article.append(node)


def enforce_fixed_assets(article: Tag, soup: BeautifulSoup, manifest: dict) -> None:
    fixed_heads = get_required_assets_by_role(manifest, 'fixed_head')
    fixed_tails = get_required_assets_by_role(manifest, 'fixed_tail')
    ctas = get_required_assets_by_role(manifest, 'cta')

    for asset in reversed(fixed_heads):
        ensure_fixed_edge_asset(
            article,
            soup,
            str(asset['wechat_url']).strip(),
            str(asset.get('alt') or asset.get('key') or 'fixed_head').strip(),
            'start',
        )

    for asset in fixed_tails + ctas:
        ensure_fixed_edge_asset(
            article,
            soup,
            str(asset['wechat_url']).strip(),
            str(asset.get('alt') or asset.get('key') or asset.get('role') or 'fixed_tail').strip(),
            'end',
        )


def validate_rendered_article(article: Tag, manifest: dict) -> None:
    h1_count = len(article.find_all('h1'))
    if h1_count != 0:
        raise ValueError(f'微信正文不应再包含 H1，当前为 {h1_count}')

    chapter_count = len(article.select('.chapter-title > h2'))
    if chapter_count == 0:
        raise ValueError('正文缺少章节标题，至少需要一个二级标题章节块')

    stray_h2 = [tag.get_text(strip=True) for tag in article.find_all('h2') if 'chapter-title' not in ((tag.parent.get('class') if isinstance(tag.parent, Tag) else []) or [])]
    if stray_h2:
        raise ValueError(f'存在未包裹为 chapter-title 的 H2：{stray_h2[:3]}')

    unresolved_imgs = []
    for img in article.find_all('img'):
        url = get_img_url(img)
        if not url.startswith('http://') and not url.startswith('https://'):
            unresolved_imgs.append(url)
    if unresolved_imgs:
        raise ValueError(f'仍存在未替换为微信 URL 的图片：{unresolved_imgs}')

    meaningful = get_meaningful_children(article)
    if not meaningful:
        raise ValueError('正文为空')

    fixed_heads = get_required_assets_by_role(manifest, 'fixed_head')
    if fixed_heads:
        first_img = meaningful[0].find('img') if isinstance(meaningful[0], Tag) else None
        first_url = get_img_url(first_img) if isinstance(first_img, Tag) else ''
        expected = str(fixed_heads[0]['wechat_url']).strip()
        if first_url != expected:
            raise ValueError('固定头图未位于正文最前')

    end_assets = get_required_assets_by_role(manifest, 'fixed_tail') + get_required_assets_by_role(manifest, 'cta')
    if end_assets:
        tail_nodes = meaningful[-len(end_assets):]
        actual = []
        for node in tail_nodes:
            img = node.find('img') if isinstance(node, Tag) else None
            actual.append(get_img_url(img) if isinstance(img, Tag) else '')
        expected = [str(asset['wechat_url']).strip() for asset in end_assets]
        if actual != expected:
            raise ValueError(f'固定尾图/CTA 顺序不对，expected={expected}, actual={actual}')


def convert_markdown(md_text: str) -> str:
    return markdown.markdown(md_text, extensions=['extra', 'sane_lists'])


def build_html(inner_html: str) -> str:
    soup = BeautifulSoup('<section class="article"><style></style></section>', 'html.parser')
    article = soup.section
    article.style.string = STYLE
    fragment = BeautifulSoup(inner_html, 'html.parser')
    for node in list(fragment.contents):
        if isinstance(node, NavigableString):
            if node.strip():
                article.append(node)
            continue
        if not isinstance(node, Tag):
            continue
        if is_image_only_paragraph(node):
            article.append(wrap_image_paragraph(node, soup))
            continue
        if node.name == 'h1':
            continue
        if node.name == 'h2':
            article.append(wrap_h2(node, soup))
            continue
        if node.name == 'hr':
            node['class'] = node.get('class', []) + ['sep']
            article.append(node)
            continue
        article.append(node)
    return str(article)


def main() -> None:
    args = parse_args()
    md_path = Path(args.markdown).resolve()
    out_path = Path(args.output).resolve()
    manifest = None
    global manifest_path_cache
    manifest_path_cache = None
    if args.manifest:
        manifest_path_cache = Path(args.manifest).resolve()
        manifest = load_manifest(manifest_path_cache)
    md_text = md_path.read_text()
    html = build_html(convert_markdown(md_text))
    if manifest is not None:
        soup = BeautifulSoup(html, 'html.parser')
        for img in soup.find_all('img'):
            src = str(img.get('src', '')).strip()
            if not src or src.startswith('http://') or src.startswith('https://'):
                continue
            wechat_url = resolve_wechat_url(manifest, md_path, src)
            img['src'] = wechat_url
        article = soup.select_one('.article')
        if article is None:
            raise ValueError('渲染结果缺少 .article 容器')
        enforce_fixed_assets(article, soup, manifest)
        validate_rendered_article(article, manifest)
        html = str(soup)
    out_path.write_text(html)
    print(out_path)


if __name__ == '__main__':
    main()
