feat:修改实现方式

2026-04-18 22:00:23 +08:00 · 2026-04-18 22:00:23 +08:00 · 55c175d7a7
parent 6fc0be476f
commit 55c175d7a7
2 changed files with 359 additions and 22 deletions
--- a/markdown-converter/SKILL.md
+++ b/markdown-converter/SKILL.md
@ -1,30 +1,31 @@
 ---
 name: markdown-converter
-description: A simple tool to convert Markdown to PNG images using browser engine.
+description: A simple tool to convert Markdown to PNG images using markdown library + BeautifulSoup + Pillow.
 metadata: {"clawdbot":{"emoji":"🖼️","os":["linux","darwin","win32"]}}
 ---

 # Markdown to PNG Converter

-A simple tool to convert Markdown documents to PNG images using Chromium browser engine.
+A simple tool to convert Markdown documents to PNG images using markdown library, BeautifulSoup, and Pillow.

 ## Features

- **Browser-based rendering**: High quality output using real browser engine
- **Full CSS support**: Supports complex Markdown with tables, code blocks, etc.
- **Code highlighting**: Syntax highlighting for code blocks
+- **Full GFM support**: Tables, code blocks, task lists, and more via markdown library
+- **BeautifulSoup parsing**: Robust HTML parsing
+- **Pure Pillow rendering**: No browser required
 - **CJK support**: Uses system fonts for Chinese character rendering

 ## Installation

 ```bash
-pip install markdown2image playwright && playwright install chromium
+pip install Pillow markdown beautifulsoup4
 ```

 ## Dependencies

- **markdown2image**: Markdown to image conversion
- **playwright**: Browser automation (Chromium)
+- **Pillow**: Image processing and drawing
+- **markdown**: Python Markdown parser with GFM extensions
+- **beautifulsoup4**: HTML parsing

 ## Usage

@ -35,11 +36,12 @@ python scripts/md_convert.py input.md output.png

 ## Supported Platforms

- **Windows**: Uses Chromium browser
- **macOS**: Uses Chromium browser
- **Linux**: Uses Chromium browser
+- **Windows**: Uses system CJK fonts
+- **macOS**: Uses system CJK fonts
+- **Linux**: Uses system CJK fonts

 ## Notes

- Requires Chromium browser (installed via `playwright install chromium`)
+- Uses markdown library for robust Markdown parsing
+- BeautifulSoup handles complex HTML structures
 - Chinese fonts are supported via system fonts
--- a/markdown-converter/scripts/md_convert.py
+++ b/markdown-converter/scripts/md_convert.py
@ -1,22 +1,359 @@
-"""Markdown to PNG converter
+"""Markdown 转 PNG 图片工具（使用 markdown 库 + BeautifulSoup + Pillow）

 Dependencies:
-  - pip install markdown2image playwright && playwright install chromium
+  - pip install Pillow markdown beautifulsoup4
+
+特点：
+  - 使用 markdown 库解析，支持完整 GFM
+  - 使用 BeautifulSoup 解析 HTML
+  - 纯 Pillow 渲染，无需浏览器
 """

-import os
 import sys
+import os
+import re
 from pathlib import Path

-from markdown2image import Markdown2Image
+try:
+    from PIL import Image, ImageDraw, ImageFont
+except ImportError:
+    print("Error: Pillow not installed. Run: pip install Pillow", file=sys.stderr)
+    sys.exit(1)

+try:
+    import markdown
+except ImportError:
+    print("Error: markdown not installed. Run: pip install markdown", file=sys.stderr)
+    sys.exit(1)
+
+try:
+    from bs4 import BeautifulSoup
+except ImportError:
+    print("Error: beautifulsoup4 not installed. Run: pip install beautifulsoup4", file=sys.stderr)
+    sys.exit(1)
+
+
+# ============================================================
+# 字体查找
+# ============================================================
+
+def _find_font():
+    """Find a suitable TrueType font across platforms."""
+    candidates = []
+    if sys.platform == "win32":
+        pf = os.environ.get("WINDIR", r"C:\Windows")
+        candidates = [
+            os.path.join(pf, "Fonts", "msyh.ttc"),
+            os.path.join(pf, "Fonts", "msyhbd.ttc"),
+            os.path.join(pf, "Fonts", "simhei.ttf"),
+            os.path.join(pf, "Fonts", "simsun.ttc"),
+        ]
+    elif sys.platform == "darwin":
+        candidates = [
+            "/System/Library/Fonts/PingFang.ttc",
+            "/System/Library/Fonts/STHeiti Light.ttc",
+            "/Library/Fonts/Arial Unicode.ttf",
+        ]
+    else:
+        candidates = [
+            "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
+            "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
+            "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+            "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
+            "/usr/share/fonts/opentype/noto/NotoSansSC-Regular.otf",
+        ]
+    for p in candidates:
+        if os.path.exists(p):
+            return p
+    return None
+
+
+# ============================================================
+# Markdown 转 PNG
+# ============================================================

 def markdown_to_png(md_text, img_path):
-    """将 Markdown 渲染为 PNG 图片"""
-    Path(img_path).parent.mkdir(parents=True, exist_ok=True)
+    """将 Markdown 转换为 PNG 图片"""
    
-    m2i = Markdown2Image()
-    m2i.b64_decode_and_dump(markdown_text=md_text, output_path=img_path)
+    font_path = _find_font()
+    
+    # 加载字体
+    try:
+        if font_path:
+            font_h1 = ImageFont.truetype(font_path, 24)
+            font_h2 = ImageFont.truetype(font_path, 20)
+            font_h3 = ImageFont.truetype(font_path, 18)
+            font_body = ImageFont.truetype(font_path, 15)
+            font_code = ImageFont.truetype(font_path, 13)
+            font_small = ImageFont.truetype(font_path, 12)
+        else:
+            font_h1 = font_h2 = font_h3 = font_body = font_code = font_small = ImageFont.load_default()
+    except Exception:
+        font_h1 = font_h2 = font_h3 = font_body = font_code = font_small = ImageFont.load_default()
+    
+    W = 800
+    PAD = 40
+    CONTENT_W = W - PAD * 2
+    
+    # 使用 markdown 库转换为 HTML，再用 BeautifulSoup 解析
+    md = markdown.Markdown(extensions=['tables', 'fenced_code', 'codehilite', 'nl2br'])
+    html = md.convert(md_text)
+    soup = BeautifulSoup(html, 'html.parser')
+    
+    # 获取 body 或根元素
+    root = soup.body if soup.body else soup
+    
+    # 创建临时 draw 对象用于测量
+    temp_img = Image.new('RGB', (W, 100))
+    draw = ImageDraw.Draw(temp_img)
+    
+    def measure_text(text, font):
+        bbox = draw.textbbox((0, 0), text, font=font)
+        return bbox[2] - bbox[0]
+    
+    def wrap_text(text, font, max_width):
+        lines = []
+        for paragraph in text.split('\n'):
+            if not paragraph.strip():
+                lines.append('')
+                continue
+            current = ''
+            for ch in paragraph:
+                test = current + ch
+                if measure_text(test, font) > max_width and current:
+                    lines.append(current)
+                    current = ch
+                else:
+                    current = test
+            if current:
+                lines.append(current)
+        return lines
+    
+    def get_text(elem):
+        """获取元素的文本内容"""
+        return elem.get_text()
+    
+    # 预计算高度
+    y = PAD
+    line_height = 26
+    
+    def calc_height(elem):
+        nonlocal y
+        tag = elem.name
+        
+        if tag in ('h1', 'h2', 'h3'):
+            text = get_text(elem)
+            font = {'h1': font_h1, 'h2': font_h2, 'h3': font_h3}[tag]
+            lines = wrap_text(text, font, CONTENT_W)
+            h = {'h1': 40, 'h2': 36, 'h3': 32}[tag]
+            y += len(lines) * h + (15 if tag == 'h1' else 12 if tag == 'h2' else 10)
+        
+        elif tag == 'p':
+            text = get_text(elem)
+            lines = wrap_text(text, font_body, CONTENT_W - 20)
+            y += len(lines) * line_height + 8
+        
+        elif tag == 'pre' or (tag == 'div' and 'codehilite' in elem.get('class', [])):
+            text = get_text(elem)
+            lines = text.split('\n') if text else ['']
+            y += len(lines) * 20 + 20
+        
+        elif tag == 'blockquote':
+            text = get_text(elem)
+            lines = wrap_text(text, font_body, CONTENT_W - 30)
+            y += len(lines) * line_height + 10 + 15  # 边框高度 + 间距
+        
+        elif tag == 'ul':
+            for li in elem.find_all('li', recursive=False):
+                text = get_text(li)
+                lines = wrap_text(text, font_body, CONTENT_W - 20)
+                y += len(lines) * line_height + 4
+            y += 8
+        
+        elif tag == 'ol':
+            for li in elem.find_all('li', recursive=False):
+                text = get_text(li)
+                lines = wrap_text(text, font_body, CONTENT_W - 20)
+                y += len(lines) * line_height + 4
+            y += 8
+        
+        elif tag == 'hr':
+            y += 30
+        
+        elif tag == 'table':
+            for row in elem.find_all('tr'):
+                y += 36
+            y += 20
+    
+    for elem in root.children:
+        if hasattr(elem, 'name') and elem.name:
+            calc_height(elem)
+    
+    y += PAD + 40
+    TOTAL_H = max(400, y)
+    
+    # 创建图片
+    img = Image.new('RGB', (W, TOTAL_H), '#1a1a2e')
+    draw = ImageDraw.Draw(img)
+    
+    # 渐变背景
+    for row in range(TOTAL_H):
+        ratio = row / TOTAL_H
+        r = int(26 + (255 - 26) * ratio * 0.1)
+        g = int(26 + (255 - 26) * ratio * 0.1)
+        b = int(46 + (255 - 46) * ratio * 0.1)
+        draw.line([(0, row), (W, row)], fill=(r, g, b))
+    
+    # 白色内容区域
+    content_top = 60
+    draw.rectangle(
+        [0, content_top, W - 1, TOTAL_H - 1],
+        fill='#ffffff',
+    )
+    
+    # 顶部渐变
+    for row in range(content_top):
+        ratio = row / content_top
+        r = int(42 + (61 - 42) * ratio)
+        g = int(98 + (133 - 98) * ratio)
+        b = int(239 + (255 - 239) * ratio)
+        draw.line([(0, row), (W, row)], fill=(r, g, b))
+    
+    # 渲染内容
+    x = PAD
+    cy = content_top + PAD
+    
+    def render_elem(elem):
+        nonlocal cy, x
+        tag = elem.name
+        
+        if tag == 'h1':
+            text = get_text(elem)
+            lines = wrap_text(text, font_h1, CONTENT_W)
+            for line in lines:
+                draw.text((x, cy), line, fill='#1a1a2e', font=font_h1)
+                cy += 40
+            cy += 15
+        
+        elif tag == 'h2':
+            text = get_text(elem)
+            lines = wrap_text(text, font_h2, CONTENT_W)
+            for line in lines:
+                draw.text((x, cy), line, fill='#1a1a2e', font=font_h2)
+                cy += 36
+            cy += 12
+        
+        elif tag == 'h3':
+            text = get_text(elem)
+            lines = wrap_text(text, font_h3, CONTENT_W)
+            for line in lines:
+                draw.text((x, cy), line, fill='#1a1a2e', font=font_h3)
+                cy += 32
+            cy += 10
+        
+        elif tag == 'p':
+            text = get_text(elem)
+            lines = wrap_text(text, font_body, CONTENT_W - 20)
+            for line in lines:
+                draw.text((x, cy), line, fill='#374151', font=font_body)
+                cy += line_height
+            cy += 8
+        
+        elif tag == 'strong' or tag == 'b':
+            text = get_text(elem)
+            lines = wrap_text(text, font_body, CONTENT_W - 20)
+            for line in lines:
+                draw.text((x, cy), line, fill='#1a1a2e', font=font_body)
+                cy += line_height
+        
+        elif tag == 'em' or tag == 'i':
+            text = get_text(elem)
+            lines = wrap_text(text, font_body, CONTENT_W - 20)
+            for line in lines:
+                draw.text((x, cy), line, fill='#666666', font=font_body)
+                cy += line_height
+        
+        elif tag == 'code' and not elem.find_all(recursive=False):
+            # 行内代码
+            text = get_text(elem)
+            draw.text((x, cy), text, fill='#333333', font=font_code)
+            x += measure_text(text, font_code)
+        
+        elif tag == 'pre' or (tag == 'div' and 'codehilite' in elem.get('class', [])):
+            # 代码高亮块
+            text = get_text(elem)
+            lines = text.split('\n') if text else ['']
+            code_h = len(lines) * 20 + 20
+            
+            draw.rounded_rectangle(
+                [x, cy, x + CONTENT_W, cy + code_h],
+                radius=8,
+                fill='#f4f4f4',
+                outline='#e0e0e0',
+                width=1,
+            )
+            
+            for i, line in enumerate(lines):
+                max_chars = int((CONTENT_W - 24) / 7)
+                display_line = line[:max_chars] if max_chars > 0 else line[:80]
+                draw.text((x + 12, cy + 10 + i * 20), display_line, fill='#333333', font=font_code)
+            
+            cy += code_h + 15
+        
+        elif tag == 'blockquote':
+            text = get_text(elem)
+            lines = wrap_text(text, font_body, CONTENT_W - 30)
+            quote_h = len(lines) * line_height
+            draw.rectangle([x, cy, x + 3, cy + quote_h], fill='#0066cc')
+            # 引用文本
+            for line in lines:
+                draw.text((x + 15, cy), line, fill='#666666', font=font_body)
+                cy += line_height
+            cy += 10
+        
+        elif tag == 'ul':
+            for li in elem.find_all('li', recursive=False):
+                text = get_text(li)
+                draw.text((x, cy), '•', fill='#0066cc', font=font_body)
+                draw.text((x + 16, cy), text.strip(), fill='#374151', font=font_body)
+                cy += line_height
+            cy += 8
+        
+        elif tag == 'ol':
+            for i, li in enumerate(elem.find_all('li', recursive=False), 1):
+                text = get_text(li)
+                draw.text((x, cy), f'{i}.', fill='#0066cc', font=font_body)
+                draw.text((x + 20, cy), text.strip(), fill='#374151', font=font_body)
+                cy += line_height
+            cy += 8
+        
+        elif tag == 'hr':
+            draw.line([(x, cy), (x + CONTENT_W, cy)], fill='#e0e0e0', width=1)
+            cy += 30
+        
+        elif tag == 'table':
+            for row in elem.find_all('tr'):
+                cells = row.find_all(['td', 'th'])
+                if cells:
+                    cell_x = x
+                    cell_w = CONTENT_W // len(cells)
+                    for cell in cells:
+                        cell_text = get_text(cell).strip()[:15]
+                        draw.rectangle([cell_x, cy, cell_x + cell_w, cy + 32], outline='#ddd')
+                        draw.text((cell_x + 8, cy + 6), cell_text, fill='#333', font=font_small)
+                        cell_x += cell_w
+                    cy += 36
+        
+        elif tag == 'br':
+            cy += line_height
+    
+    for elem in root.children:
+        if hasattr(elem, 'name') and elem.name:
+            render_elem(elem)
+    
+    Path(img_path).parent.mkdir(parents=True, exist_ok=True)
+    img.save(img_path, 'PNG')
+    print(f'Converted: Markdown -> {img_path}')


 def main():
@ -34,8 +371,6 @@ def main():
    md_text = Path(input_path).read_text(encoding="utf-8")
    markdown_to_png(md_text, output_path)

-    print(f"Converted: {input_path} -> {output_path}")
-

 if __name__ == "__main__":
    main()