SKILLS/markdown-converter/scripts/md_convert.py

"""Markdown 转 PNG 图片工具（使用 markdown 库 + BeautifulSoup + Pillow）

Dependencies:
  - pip install Pillow markdown beautifulsoup4

特点：
  - 使用 markdown 库解析，支持完整 GFM
  - 使用 BeautifulSoup 解析 HTML
  - 纯 Pillow 渲染，无需浏览器
"""

import sys
import os
import re
from pathlib import Path

try:
    from PIL import Image, ImageDraw, ImageFont
except ImportError:
    print("Error: Pillow not installed. Run: pip install Pillow", file=sys.stderr)
    sys.exit(1)

try:
    import markdown
except ImportError:
    print("Error: markdown not installed. Run: pip install markdown", file=sys.stderr)
    sys.exit(1)

try:
    from bs4 import BeautifulSoup
except ImportError:
    print("Error: beautifulsoup4 not installed. Run: pip install beautifulsoup4", file=sys.stderr)
    sys.exit(1)


# ============================================================
# 字体查找
# ============================================================

def _find_font():
    """Find a suitable TrueType font across platforms."""
    candidates = []
    if sys.platform == "win32":
        pf = os.environ.get("WINDIR", r"C:\Windows")
        candidates = [
            os.path.join(pf, "Fonts", "msyh.ttc"),
            os.path.join(pf, "Fonts", "msyhbd.ttc"),
            os.path.join(pf, "Fonts", "simhei.ttf"),
            os.path.join(pf, "Fonts", "simsun.ttc"),
        ]
    elif sys.platform == "darwin":
        candidates = [
            "/System/Library/Fonts/PingFang.ttc",
            "/System/Library/Fonts/STHeiti Light.ttc",
            "/Library/Fonts/Arial Unicode.ttf",
        ]
    else:
        candidates = [
            "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
            "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
            "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
            "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
            "/usr/share/fonts/opentype/noto/NotoSansSC-Regular.otf",
        ]
    for p in candidates:
        if os.path.exists(p):
            return p
    return None


# ============================================================
# Markdown 转 PNG
# ============================================================

def markdown_to_png(md_text, img_path):
    """将 Markdown 转换为 PNG 图片"""

    font_path = _find_font()

    # 加载字体
    try:
        if font_path:
            font_h1 = ImageFont.truetype(font_path, 24)
            font_h2 = ImageFont.truetype(font_path, 20)
            font_h3 = ImageFont.truetype(font_path, 18)
            font_body = ImageFont.truetype(font_path, 15)
            font_code = ImageFont.truetype(font_path, 13)
            font_small = ImageFont.truetype(font_path, 12)
        else:
            font_h1 = font_h2 = font_h3 = font_body = font_code = font_small = ImageFont.load_default()
    except Exception:
        font_h1 = font_h2 = font_h3 = font_body = font_code = font_small = ImageFont.load_default()

    W = 800
    PAD = 40
    CONTENT_W = W - PAD * 2

    # 使用 markdown 库转换为 HTML，再用 BeautifulSoup 解析
    md = markdown.Markdown(extensions=['tables', 'fenced_code', 'codehilite', 'nl2br'])
    html = md.convert(md_text)
    soup = BeautifulSoup(html, 'html.parser')

    # 获取 body 或根元素
    root = soup.body if soup.body else soup

    # 创建临时 draw 对象用于测量
    temp_img = Image.new('RGB', (W, 100))
    draw = ImageDraw.Draw(temp_img)

    def measure_text(text, font):
        bbox = draw.textbbox((0, 0), text, font=font)
        return bbox[2] - bbox[0]

    def wrap_text(text, font, max_width):
        lines = []
        for paragraph in text.split('\n'):
            if not paragraph.strip():
                lines.append('')
                continue
            current = ''
            for ch in paragraph:
                test = current + ch
                if measure_text(test, font) > max_width and current:
                    lines.append(current)
                    current = ch
                else:
                    current = test
            if current:
                lines.append(current)
        return lines

    def get_text(elem):
        """获取元素的文本内容"""
        return elem.get_text()

    # 预计算高度
    y = PAD
    line_height = 26

    def calc_height(elem):
        nonlocal y
        tag = elem.name

        if tag in ('h1', 'h2', 'h3'):
            text = get_text(elem)
            font = {'h1': font_h1, 'h2': font_h2, 'h3': font_h3}[tag]
            lines = wrap_text(text, font, CONTENT_W)
            h = {'h1': 40, 'h2': 36, 'h3': 32}[tag]
            y += len(lines) * h + (15 if tag == 'h1' else 12 if tag == 'h2' else 10)

        elif tag == 'p':
            text = get_text(elem)
            lines = wrap_text(text, font_body, CONTENT_W - 20)
            y += len(lines) * line_height + 8

        elif tag == 'pre' or (tag == 'div' and 'codehilite' in elem.get('class', [])):
            text = get_text(elem)
            lines = text.split('\n') if text else ['']
            y += len(lines) * 20 + 20

        elif tag == 'blockquote':
            text = get_text(elem)
            lines = wrap_text(text, font_body, CONTENT_W - 30)
            y += len(lines) * line_height + 10 + 15  # 边框高度 + 间距

        elif tag == 'ul':
            for li in elem.find_all('li', recursive=False):
                text = get_text(li)
                lines = wrap_text(text, font_body, CONTENT_W - 20)
                y += len(lines) * line_height + 4
            y += 8

        elif tag == 'ol':
            for li in elem.find_all('li', recursive=False):
                text = get_text(li)
                lines = wrap_text(text, font_body, CONTENT_W - 20)
                y += len(lines) * line_height + 4
            y += 8

        elif tag == 'hr':
            y += 30

        elif tag == 'table':
            for row in elem.find_all('tr'):
                y += 36
            y += 20

    for elem in root.children:
        if hasattr(elem, 'name') and elem.name:
            calc_height(elem)

    y += PAD + 40
    TOTAL_H = max(400, y)

    # 创建图片
    img = Image.new('RGB', (W, TOTAL_H), '#1a1a2e')
    draw = ImageDraw.Draw(img)

    # 渐变背景
    for row in range(TOTAL_H):
        ratio = row / TOTAL_H
        r = int(26 + (255 - 26) * ratio * 0.1)
        g = int(26 + (255 - 26) * ratio * 0.1)
        b = int(46 + (255 - 46) * ratio * 0.1)
        draw.line([(0, row), (W, row)], fill=(r, g, b))

    # 白色内容区域
    content_top = 60
    draw.rectangle(
        [0, content_top, W - 1, TOTAL_H - 1],
        fill='#ffffff',
    )

    # 顶部渐变
    for row in range(content_top):
        ratio = row / content_top
        r = int(42 + (61 - 42) * ratio)
        g = int(98 + (133 - 98) * ratio)
        b = int(239 + (255 - 239) * ratio)
        draw.line([(0, row), (W, row)], fill=(r, g, b))

    # 渲染内容
    x = PAD
    cy = content_top + PAD

    def render_elem(elem):
        nonlocal cy, x
        tag = elem.name

        if tag == 'h1':
            text = get_text(elem)
            lines = wrap_text(text, font_h1, CONTENT_W)
            for line in lines:
                draw.text((x, cy), line, fill='#1a1a2e', font=font_h1)
                cy += 40
            cy += 15

        elif tag == 'h2':
            text = get_text(elem)
            lines = wrap_text(text, font_h2, CONTENT_W)
            for line in lines:
                draw.text((x, cy), line, fill='#1a1a2e', font=font_h2)
                cy += 36
            cy += 12

        elif tag == 'h3':
            text = get_text(elem)
            lines = wrap_text(text, font_h3, CONTENT_W)
            for line in lines:
                draw.text((x, cy), line, fill='#1a1a2e', font=font_h3)
                cy += 32
            cy += 10

        elif tag == 'p':
            text = get_text(elem)
            lines = wrap_text(text, font_body, CONTENT_W - 20)
            for line in lines:
                draw.text((x, cy), line, fill='#374151', font=font_body)
                cy += line_height
            cy += 8

        elif tag == 'strong' or tag == 'b':
            text = get_text(elem)
            lines = wrap_text(text, font_body, CONTENT_W - 20)
            for line in lines:
                draw.text((x, cy), line, fill='#1a1a2e', font=font_body)
                cy += line_height

        elif tag == 'em' or tag == 'i':
            text = get_text(elem)
            lines = wrap_text(text, font_body, CONTENT_W - 20)
            for line in lines:
                draw.text((x, cy), line, fill='#666666', font=font_body)
                cy += line_height

        elif tag == 'code' and not elem.find_all(recursive=False):
            # 行内代码
            text = get_text(elem)
            draw.text((x, cy), text, fill='#333333', font=font_code)
            x += measure_text(text, font_code)

        elif tag == 'pre' or (tag == 'div' and 'codehilite' in elem.get('class', [])):
            # 代码高亮块
            text = get_text(elem)
            lines = text.split('\n') if text else ['']
            code_h = len(lines) * 20 + 20

            draw.rounded_rectangle(
                [x, cy, x + CONTENT_W, cy + code_h],
                radius=8,
                fill='#f4f4f4',
                outline='#e0e0e0',
                width=1,
            )

            for i, line in enumerate(lines):
                max_chars = int((CONTENT_W - 24) / 7)
                display_line = line[:max_chars] if max_chars > 0 else line[:80]
                draw.text((x + 12, cy + 10 + i * 20), display_line, fill='#333333', font=font_code)

            cy += code_h + 15

        elif tag == 'blockquote':
            text = get_text(elem)
            lines = wrap_text(text, font_body, CONTENT_W - 30)
            quote_h = len(lines) * line_height
            draw.rectangle([x, cy, x + 3, cy + quote_h], fill='#0066cc')
            # 引用文本
            for line in lines:
                draw.text((x + 15, cy), line, fill='#666666', font=font_body)
                cy += line_height
            cy += 10

        elif tag == 'ul':
            for li in elem.find_all('li', recursive=False):
                text = get_text(li)
                draw.text((x, cy), '•', fill='#0066cc', font=font_body)
                draw.text((x + 16, cy), text.strip(), fill='#374151', font=font_body)
                cy += line_height
            cy += 8

        elif tag == 'ol':
            for i, li in enumerate(elem.find_all('li', recursive=False), 1):
                text = get_text(li)
                draw.text((x, cy), f'{i}.', fill='#0066cc', font=font_body)
                draw.text((x + 20, cy), text.strip(), fill='#374151', font=font_body)
                cy += line_height
            cy += 8

        elif tag == 'hr':
            draw.line([(x, cy), (x + CONTENT_W, cy)], fill='#e0e0e0', width=1)
            cy += 30

        elif tag == 'table':
            for row in elem.find_all('tr'):
                cells = row.find_all(['td', 'th'])
                if cells:
                    cell_x = x
                    cell_w = CONTENT_W // len(cells)
                    for cell in cells:
                        cell_text = get_text(cell).strip()[:15]
                        draw.rectangle([cell_x, cy, cell_x + cell_w, cy + 32], outline='#ddd')
                        draw.text((cell_x + 8, cy + 6), cell_text, fill='#333', font=font_small)
                        cell_x += cell_w
                    cy += 36

        elif tag == 'br':
            cy += line_height

    for elem in root.children:
        if hasattr(elem, 'name') and elem.name:
            render_elem(elem)

    Path(img_path).parent.mkdir(parents=True, exist_ok=True)
    img.save(img_path, 'PNG')
    print(f'Converted: Markdown -> {img_path}')


def main():
    if len(sys.argv) < 3:
        print("Usage: python md_convert.py <input.md> <output.png>", file=sys.stderr)
        sys.exit(1)

    input_path = sys.argv[1]
    output_path = sys.argv[2]

    if not os.path.exists(input_path):
        print(f"Error: Input file not found: {input_path}", file=sys.stderr)
        sys.exit(1)

    md_text = Path(input_path).read_text(encoding="utf-8")
    markdown_to_png(md_text, output_path)


if __name__ == "__main__":
    main()