diff --git a/markdown-converter/SKILL.md b/markdown-converter/SKILL.md index 47bfa41..74ff984 100644 --- a/markdown-converter/SKILL.md +++ b/markdown-converter/SKILL.md @@ -1,30 +1,31 @@ --- name: markdown-converter -description: A simple tool to convert Markdown to PNG images using browser engine. +description: A simple tool to convert Markdown to PNG images using markdown library + BeautifulSoup + Pillow. metadata: {"clawdbot":{"emoji":"🖼️","os":["linux","darwin","win32"]}} --- # Markdown to PNG Converter -A simple tool to convert Markdown documents to PNG images using Chromium browser engine. +A simple tool to convert Markdown documents to PNG images using markdown library, BeautifulSoup, and Pillow. ## Features -- **Browser-based rendering**: High quality output using real browser engine -- **Full CSS support**: Supports complex Markdown with tables, code blocks, etc. -- **Code highlighting**: Syntax highlighting for code blocks +- **Full GFM support**: Tables, code blocks, task lists, and more via markdown library +- **BeautifulSoup parsing**: Robust HTML parsing +- **Pure Pillow rendering**: No browser required - **CJK support**: Uses system fonts for Chinese character rendering ## Installation ```bash -pip install markdown2image playwright && playwright install chromium +pip install Pillow markdown beautifulsoup4 ``` ## Dependencies -- **markdown2image**: Markdown to image conversion -- **playwright**: Browser automation (Chromium) +- **Pillow**: Image processing and drawing +- **markdown**: Python Markdown parser with GFM extensions +- **beautifulsoup4**: HTML parsing ## Usage @@ -35,11 +36,12 @@ python scripts/md_convert.py input.md output.png ## Supported Platforms -- **Windows**: Uses Chromium browser -- **macOS**: Uses Chromium browser -- **Linux**: Uses Chromium browser +- **Windows**: Uses system CJK fonts +- **macOS**: Uses system CJK fonts +- **Linux**: Uses system CJK fonts ## Notes -- Requires Chromium browser (installed via `playwright install chromium`) +- Uses markdown library for robust Markdown parsing +- BeautifulSoup handles complex HTML structures - Chinese fonts are supported via system fonts diff --git a/markdown-converter/scripts/md_convert.py b/markdown-converter/scripts/md_convert.py index b7a9a17..f739bfb 100644 --- a/markdown-converter/scripts/md_convert.py +++ b/markdown-converter/scripts/md_convert.py @@ -1,22 +1,359 @@ -"""Markdown to PNG converter +"""Markdown 转 PNG 图片工具(使用 markdown 库 + BeautifulSoup + Pillow) Dependencies: - - pip install markdown2image playwright && playwright install chromium + - pip install Pillow markdown beautifulsoup4 + +特点: + - 使用 markdown 库解析,支持完整 GFM + - 使用 BeautifulSoup 解析 HTML + - 纯 Pillow 渲染,无需浏览器 """ -import os import sys +import os +import re from pathlib import Path -from markdown2image import Markdown2Image +try: + from PIL import Image, ImageDraw, ImageFont +except ImportError: + print("Error: Pillow not installed. Run: pip install Pillow", file=sys.stderr) + sys.exit(1) +try: + import markdown +except ImportError: + print("Error: markdown not installed. Run: pip install markdown", file=sys.stderr) + sys.exit(1) + +try: + from bs4 import BeautifulSoup +except ImportError: + print("Error: beautifulsoup4 not installed. Run: pip install beautifulsoup4", file=sys.stderr) + sys.exit(1) + + +# ============================================================ +# 字体查找 +# ============================================================ + +def _find_font(): + """Find a suitable TrueType font across platforms.""" + candidates = [] + if sys.platform == "win32": + pf = os.environ.get("WINDIR", r"C:\Windows") + candidates = [ + os.path.join(pf, "Fonts", "msyh.ttc"), + os.path.join(pf, "Fonts", "msyhbd.ttc"), + os.path.join(pf, "Fonts", "simhei.ttf"), + os.path.join(pf, "Fonts", "simsun.ttc"), + ] + elif sys.platform == "darwin": + candidates = [ + "/System/Library/Fonts/PingFang.ttc", + "/System/Library/Fonts/STHeiti Light.ttc", + "/Library/Fonts/Arial Unicode.ttf", + ] + else: + candidates = [ + "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc", + "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", + "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", + "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", + "/usr/share/fonts/opentype/noto/NotoSansSC-Regular.otf", + ] + for p in candidates: + if os.path.exists(p): + return p + return None + + +# ============================================================ +# Markdown 转 PNG +# ============================================================ def markdown_to_png(md_text, img_path): - """将 Markdown 渲染为 PNG 图片""" - Path(img_path).parent.mkdir(parents=True, exist_ok=True) + """将 Markdown 转换为 PNG 图片""" - m2i = Markdown2Image() - m2i.b64_decode_and_dump(markdown_text=md_text, output_path=img_path) + font_path = _find_font() + + # 加载字体 + try: + if font_path: + font_h1 = ImageFont.truetype(font_path, 24) + font_h2 = ImageFont.truetype(font_path, 20) + font_h3 = ImageFont.truetype(font_path, 18) + font_body = ImageFont.truetype(font_path, 15) + font_code = ImageFont.truetype(font_path, 13) + font_small = ImageFont.truetype(font_path, 12) + else: + font_h1 = font_h2 = font_h3 = font_body = font_code = font_small = ImageFont.load_default() + except Exception: + font_h1 = font_h2 = font_h3 = font_body = font_code = font_small = ImageFont.load_default() + + W = 800 + PAD = 40 + CONTENT_W = W - PAD * 2 + + # 使用 markdown 库转换为 HTML,再用 BeautifulSoup 解析 + md = markdown.Markdown(extensions=['tables', 'fenced_code', 'codehilite', 'nl2br']) + html = md.convert(md_text) + soup = BeautifulSoup(html, 'html.parser') + + # 获取 body 或根元素 + root = soup.body if soup.body else soup + + # 创建临时 draw 对象用于测量 + temp_img = Image.new('RGB', (W, 100)) + draw = ImageDraw.Draw(temp_img) + + def measure_text(text, font): + bbox = draw.textbbox((0, 0), text, font=font) + return bbox[2] - bbox[0] + + def wrap_text(text, font, max_width): + lines = [] + for paragraph in text.split('\n'): + if not paragraph.strip(): + lines.append('') + continue + current = '' + for ch in paragraph: + test = current + ch + if measure_text(test, font) > max_width and current: + lines.append(current) + current = ch + else: + current = test + if current: + lines.append(current) + return lines + + def get_text(elem): + """获取元素的文本内容""" + return elem.get_text() + + # 预计算高度 + y = PAD + line_height = 26 + + def calc_height(elem): + nonlocal y + tag = elem.name + + if tag in ('h1', 'h2', 'h3'): + text = get_text(elem) + font = {'h1': font_h1, 'h2': font_h2, 'h3': font_h3}[tag] + lines = wrap_text(text, font, CONTENT_W) + h = {'h1': 40, 'h2': 36, 'h3': 32}[tag] + y += len(lines) * h + (15 if tag == 'h1' else 12 if tag == 'h2' else 10) + + elif tag == 'p': + text = get_text(elem) + lines = wrap_text(text, font_body, CONTENT_W - 20) + y += len(lines) * line_height + 8 + + elif tag == 'pre' or (tag == 'div' and 'codehilite' in elem.get('class', [])): + text = get_text(elem) + lines = text.split('\n') if text else [''] + y += len(lines) * 20 + 20 + + elif tag == 'blockquote': + text = get_text(elem) + lines = wrap_text(text, font_body, CONTENT_W - 30) + y += len(lines) * line_height + 10 + 15 # 边框高度 + 间距 + + elif tag == 'ul': + for li in elem.find_all('li', recursive=False): + text = get_text(li) + lines = wrap_text(text, font_body, CONTENT_W - 20) + y += len(lines) * line_height + 4 + y += 8 + + elif tag == 'ol': + for li in elem.find_all('li', recursive=False): + text = get_text(li) + lines = wrap_text(text, font_body, CONTENT_W - 20) + y += len(lines) * line_height + 4 + y += 8 + + elif tag == 'hr': + y += 30 + + elif tag == 'table': + for row in elem.find_all('tr'): + y += 36 + y += 20 + + for elem in root.children: + if hasattr(elem, 'name') and elem.name: + calc_height(elem) + + y += PAD + 40 + TOTAL_H = max(400, y) + + # 创建图片 + img = Image.new('RGB', (W, TOTAL_H), '#1a1a2e') + draw = ImageDraw.Draw(img) + + # 渐变背景 + for row in range(TOTAL_H): + ratio = row / TOTAL_H + r = int(26 + (255 - 26) * ratio * 0.1) + g = int(26 + (255 - 26) * ratio * 0.1) + b = int(46 + (255 - 46) * ratio * 0.1) + draw.line([(0, row), (W, row)], fill=(r, g, b)) + + # 白色内容区域 + content_top = 60 + draw.rectangle( + [0, content_top, W - 1, TOTAL_H - 1], + fill='#ffffff', + ) + + # 顶部渐变 + for row in range(content_top): + ratio = row / content_top + r = int(42 + (61 - 42) * ratio) + g = int(98 + (133 - 98) * ratio) + b = int(239 + (255 - 239) * ratio) + draw.line([(0, row), (W, row)], fill=(r, g, b)) + + # 渲染内容 + x = PAD + cy = content_top + PAD + + def render_elem(elem): + nonlocal cy, x + tag = elem.name + + if tag == 'h1': + text = get_text(elem) + lines = wrap_text(text, font_h1, CONTENT_W) + for line in lines: + draw.text((x, cy), line, fill='#1a1a2e', font=font_h1) + cy += 40 + cy += 15 + + elif tag == 'h2': + text = get_text(elem) + lines = wrap_text(text, font_h2, CONTENT_W) + for line in lines: + draw.text((x, cy), line, fill='#1a1a2e', font=font_h2) + cy += 36 + cy += 12 + + elif tag == 'h3': + text = get_text(elem) + lines = wrap_text(text, font_h3, CONTENT_W) + for line in lines: + draw.text((x, cy), line, fill='#1a1a2e', font=font_h3) + cy += 32 + cy += 10 + + elif tag == 'p': + text = get_text(elem) + lines = wrap_text(text, font_body, CONTENT_W - 20) + for line in lines: + draw.text((x, cy), line, fill='#374151', font=font_body) + cy += line_height + cy += 8 + + elif tag == 'strong' or tag == 'b': + text = get_text(elem) + lines = wrap_text(text, font_body, CONTENT_W - 20) + for line in lines: + draw.text((x, cy), line, fill='#1a1a2e', font=font_body) + cy += line_height + + elif tag == 'em' or tag == 'i': + text = get_text(elem) + lines = wrap_text(text, font_body, CONTENT_W - 20) + for line in lines: + draw.text((x, cy), line, fill='#666666', font=font_body) + cy += line_height + + elif tag == 'code' and not elem.find_all(recursive=False): + # 行内代码 + text = get_text(elem) + draw.text((x, cy), text, fill='#333333', font=font_code) + x += measure_text(text, font_code) + + elif tag == 'pre' or (tag == 'div' and 'codehilite' in elem.get('class', [])): + # 代码高亮块 + text = get_text(elem) + lines = text.split('\n') if text else [''] + code_h = len(lines) * 20 + 20 + + draw.rounded_rectangle( + [x, cy, x + CONTENT_W, cy + code_h], + radius=8, + fill='#f4f4f4', + outline='#e0e0e0', + width=1, + ) + + for i, line in enumerate(lines): + max_chars = int((CONTENT_W - 24) / 7) + display_line = line[:max_chars] if max_chars > 0 else line[:80] + draw.text((x + 12, cy + 10 + i * 20), display_line, fill='#333333', font=font_code) + + cy += code_h + 15 + + elif tag == 'blockquote': + text = get_text(elem) + lines = wrap_text(text, font_body, CONTENT_W - 30) + quote_h = len(lines) * line_height + draw.rectangle([x, cy, x + 3, cy + quote_h], fill='#0066cc') + # 引用文本 + for line in lines: + draw.text((x + 15, cy), line, fill='#666666', font=font_body) + cy += line_height + cy += 10 + + elif tag == 'ul': + for li in elem.find_all('li', recursive=False): + text = get_text(li) + draw.text((x, cy), '•', fill='#0066cc', font=font_body) + draw.text((x + 16, cy), text.strip(), fill='#374151', font=font_body) + cy += line_height + cy += 8 + + elif tag == 'ol': + for i, li in enumerate(elem.find_all('li', recursive=False), 1): + text = get_text(li) + draw.text((x, cy), f'{i}.', fill='#0066cc', font=font_body) + draw.text((x + 20, cy), text.strip(), fill='#374151', font=font_body) + cy += line_height + cy += 8 + + elif tag == 'hr': + draw.line([(x, cy), (x + CONTENT_W, cy)], fill='#e0e0e0', width=1) + cy += 30 + + elif tag == 'table': + for row in elem.find_all('tr'): + cells = row.find_all(['td', 'th']) + if cells: + cell_x = x + cell_w = CONTENT_W // len(cells) + for cell in cells: + cell_text = get_text(cell).strip()[:15] + draw.rectangle([cell_x, cy, cell_x + cell_w, cy + 32], outline='#ddd') + draw.text((cell_x + 8, cy + 6), cell_text, fill='#333', font=font_small) + cell_x += cell_w + cy += 36 + + elif tag == 'br': + cy += line_height + + for elem in root.children: + if hasattr(elem, 'name') and elem.name: + render_elem(elem) + + Path(img_path).parent.mkdir(parents=True, exist_ok=True) + img.save(img_path, 'PNG') + print(f'Converted: Markdown -> {img_path}') def main(): @@ -34,8 +371,6 @@ def main(): md_text = Path(input_path).read_text(encoding="utf-8") markdown_to_png(md_text, output_path) - print(f"Converted: {input_path} -> {output_path}") - if __name__ == "__main__": main()