feat:修改实现方式
This commit is contained in:
parent
6fc0be476f
commit
55c175d7a7
|
|
@ -1,30 +1,31 @@
|
||||||
---
|
---
|
||||||
name: markdown-converter
|
name: markdown-converter
|
||||||
description: A simple tool to convert Markdown to PNG images using browser engine.
|
description: A simple tool to convert Markdown to PNG images using markdown library + BeautifulSoup + Pillow.
|
||||||
metadata: {"clawdbot":{"emoji":"🖼️","os":["linux","darwin","win32"]}}
|
metadata: {"clawdbot":{"emoji":"🖼️","os":["linux","darwin","win32"]}}
|
||||||
---
|
---
|
||||||
|
|
||||||
# Markdown to PNG Converter
|
# Markdown to PNG Converter
|
||||||
|
|
||||||
A simple tool to convert Markdown documents to PNG images using Chromium browser engine.
|
A simple tool to convert Markdown documents to PNG images using markdown library, BeautifulSoup, and Pillow.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- **Browser-based rendering**: High quality output using real browser engine
|
- **Full GFM support**: Tables, code blocks, task lists, and more via markdown library
|
||||||
- **Full CSS support**: Supports complex Markdown with tables, code blocks, etc.
|
- **BeautifulSoup parsing**: Robust HTML parsing
|
||||||
- **Code highlighting**: Syntax highlighting for code blocks
|
- **Pure Pillow rendering**: No browser required
|
||||||
- **CJK support**: Uses system fonts for Chinese character rendering
|
- **CJK support**: Uses system fonts for Chinese character rendering
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install markdown2image playwright && playwright install chromium
|
pip install Pillow markdown beautifulsoup4
|
||||||
```
|
```
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
- **markdown2image**: Markdown to image conversion
|
- **Pillow**: Image processing and drawing
|
||||||
- **playwright**: Browser automation (Chromium)
|
- **markdown**: Python Markdown parser with GFM extensions
|
||||||
|
- **beautifulsoup4**: HTML parsing
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
|
@ -35,11 +36,12 @@ python scripts/md_convert.py input.md output.png
|
||||||
|
|
||||||
## Supported Platforms
|
## Supported Platforms
|
||||||
|
|
||||||
- **Windows**: Uses Chromium browser
|
- **Windows**: Uses system CJK fonts
|
||||||
- **macOS**: Uses Chromium browser
|
- **macOS**: Uses system CJK fonts
|
||||||
- **Linux**: Uses Chromium browser
|
- **Linux**: Uses system CJK fonts
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
- Requires Chromium browser (installed via `playwright install chromium`)
|
- Uses markdown library for robust Markdown parsing
|
||||||
|
- BeautifulSoup handles complex HTML structures
|
||||||
- Chinese fonts are supported via system fonts
|
- Chinese fonts are supported via system fonts
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,359 @@
|
||||||
"""Markdown to PNG converter
|
"""Markdown 转 PNG 图片工具(使用 markdown 库 + BeautifulSoup + Pillow)
|
||||||
|
|
||||||
Dependencies:
|
Dependencies:
|
||||||
- pip install markdown2image playwright && playwright install chromium
|
- pip install Pillow markdown beautifulsoup4
|
||||||
|
|
||||||
|
特点:
|
||||||
|
- 使用 markdown 库解析,支持完整 GFM
|
||||||
|
- 使用 BeautifulSoup 解析 HTML
|
||||||
|
- 纯 Pillow 渲染,无需浏览器
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from markdown2image import Markdown2Image
|
try:
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
except ImportError:
|
||||||
|
print("Error: Pillow not installed. Run: pip install Pillow", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import markdown
|
||||||
|
except ImportError:
|
||||||
|
print("Error: markdown not installed. Run: pip install markdown", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
except ImportError:
|
||||||
|
print("Error: beautifulsoup4 not installed. Run: pip install beautifulsoup4", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 字体查找
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
def _find_font():
|
||||||
|
"""Find a suitable TrueType font across platforms."""
|
||||||
|
candidates = []
|
||||||
|
if sys.platform == "win32":
|
||||||
|
pf = os.environ.get("WINDIR", r"C:\Windows")
|
||||||
|
candidates = [
|
||||||
|
os.path.join(pf, "Fonts", "msyh.ttc"),
|
||||||
|
os.path.join(pf, "Fonts", "msyhbd.ttc"),
|
||||||
|
os.path.join(pf, "Fonts", "simhei.ttf"),
|
||||||
|
os.path.join(pf, "Fonts", "simsun.ttc"),
|
||||||
|
]
|
||||||
|
elif sys.platform == "darwin":
|
||||||
|
candidates = [
|
||||||
|
"/System/Library/Fonts/PingFang.ttc",
|
||||||
|
"/System/Library/Fonts/STHeiti Light.ttc",
|
||||||
|
"/Library/Fonts/Arial Unicode.ttf",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
candidates = [
|
||||||
|
"/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
|
||||||
|
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
|
||||||
|
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
||||||
|
"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
|
||||||
|
"/usr/share/fonts/opentype/noto/NotoSansSC-Regular.otf",
|
||||||
|
]
|
||||||
|
for p in candidates:
|
||||||
|
if os.path.exists(p):
|
||||||
|
return p
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Markdown 转 PNG
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
def markdown_to_png(md_text, img_path):
|
def markdown_to_png(md_text, img_path):
|
||||||
"""将 Markdown 渲染为 PNG 图片"""
|
"""将 Markdown 转换为 PNG 图片"""
|
||||||
Path(img_path).parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
m2i = Markdown2Image()
|
font_path = _find_font()
|
||||||
m2i.b64_decode_and_dump(markdown_text=md_text, output_path=img_path)
|
|
||||||
|
# 加载字体
|
||||||
|
try:
|
||||||
|
if font_path:
|
||||||
|
font_h1 = ImageFont.truetype(font_path, 24)
|
||||||
|
font_h2 = ImageFont.truetype(font_path, 20)
|
||||||
|
font_h3 = ImageFont.truetype(font_path, 18)
|
||||||
|
font_body = ImageFont.truetype(font_path, 15)
|
||||||
|
font_code = ImageFont.truetype(font_path, 13)
|
||||||
|
font_small = ImageFont.truetype(font_path, 12)
|
||||||
|
else:
|
||||||
|
font_h1 = font_h2 = font_h3 = font_body = font_code = font_small = ImageFont.load_default()
|
||||||
|
except Exception:
|
||||||
|
font_h1 = font_h2 = font_h3 = font_body = font_code = font_small = ImageFont.load_default()
|
||||||
|
|
||||||
|
W = 800
|
||||||
|
PAD = 40
|
||||||
|
CONTENT_W = W - PAD * 2
|
||||||
|
|
||||||
|
# 使用 markdown 库转换为 HTML,再用 BeautifulSoup 解析
|
||||||
|
md = markdown.Markdown(extensions=['tables', 'fenced_code', 'codehilite', 'nl2br'])
|
||||||
|
html = md.convert(md_text)
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
|
||||||
|
# 获取 body 或根元素
|
||||||
|
root = soup.body if soup.body else soup
|
||||||
|
|
||||||
|
# 创建临时 draw 对象用于测量
|
||||||
|
temp_img = Image.new('RGB', (W, 100))
|
||||||
|
draw = ImageDraw.Draw(temp_img)
|
||||||
|
|
||||||
|
def measure_text(text, font):
|
||||||
|
bbox = draw.textbbox((0, 0), text, font=font)
|
||||||
|
return bbox[2] - bbox[0]
|
||||||
|
|
||||||
|
def wrap_text(text, font, max_width):
|
||||||
|
lines = []
|
||||||
|
for paragraph in text.split('\n'):
|
||||||
|
if not paragraph.strip():
|
||||||
|
lines.append('')
|
||||||
|
continue
|
||||||
|
current = ''
|
||||||
|
for ch in paragraph:
|
||||||
|
test = current + ch
|
||||||
|
if measure_text(test, font) > max_width and current:
|
||||||
|
lines.append(current)
|
||||||
|
current = ch
|
||||||
|
else:
|
||||||
|
current = test
|
||||||
|
if current:
|
||||||
|
lines.append(current)
|
||||||
|
return lines
|
||||||
|
|
||||||
|
def get_text(elem):
|
||||||
|
"""获取元素的文本内容"""
|
||||||
|
return elem.get_text()
|
||||||
|
|
||||||
|
# 预计算高度
|
||||||
|
y = PAD
|
||||||
|
line_height = 26
|
||||||
|
|
||||||
|
def calc_height(elem):
|
||||||
|
nonlocal y
|
||||||
|
tag = elem.name
|
||||||
|
|
||||||
|
if tag in ('h1', 'h2', 'h3'):
|
||||||
|
text = get_text(elem)
|
||||||
|
font = {'h1': font_h1, 'h2': font_h2, 'h3': font_h3}[tag]
|
||||||
|
lines = wrap_text(text, font, CONTENT_W)
|
||||||
|
h = {'h1': 40, 'h2': 36, 'h3': 32}[tag]
|
||||||
|
y += len(lines) * h + (15 if tag == 'h1' else 12 if tag == 'h2' else 10)
|
||||||
|
|
||||||
|
elif tag == 'p':
|
||||||
|
text = get_text(elem)
|
||||||
|
lines = wrap_text(text, font_body, CONTENT_W - 20)
|
||||||
|
y += len(lines) * line_height + 8
|
||||||
|
|
||||||
|
elif tag == 'pre' or (tag == 'div' and 'codehilite' in elem.get('class', [])):
|
||||||
|
text = get_text(elem)
|
||||||
|
lines = text.split('\n') if text else ['']
|
||||||
|
y += len(lines) * 20 + 20
|
||||||
|
|
||||||
|
elif tag == 'blockquote':
|
||||||
|
text = get_text(elem)
|
||||||
|
lines = wrap_text(text, font_body, CONTENT_W - 30)
|
||||||
|
y += len(lines) * line_height + 10 + 15 # 边框高度 + 间距
|
||||||
|
|
||||||
|
elif tag == 'ul':
|
||||||
|
for li in elem.find_all('li', recursive=False):
|
||||||
|
text = get_text(li)
|
||||||
|
lines = wrap_text(text, font_body, CONTENT_W - 20)
|
||||||
|
y += len(lines) * line_height + 4
|
||||||
|
y += 8
|
||||||
|
|
||||||
|
elif tag == 'ol':
|
||||||
|
for li in elem.find_all('li', recursive=False):
|
||||||
|
text = get_text(li)
|
||||||
|
lines = wrap_text(text, font_body, CONTENT_W - 20)
|
||||||
|
y += len(lines) * line_height + 4
|
||||||
|
y += 8
|
||||||
|
|
||||||
|
elif tag == 'hr':
|
||||||
|
y += 30
|
||||||
|
|
||||||
|
elif tag == 'table':
|
||||||
|
for row in elem.find_all('tr'):
|
||||||
|
y += 36
|
||||||
|
y += 20
|
||||||
|
|
||||||
|
for elem in root.children:
|
||||||
|
if hasattr(elem, 'name') and elem.name:
|
||||||
|
calc_height(elem)
|
||||||
|
|
||||||
|
y += PAD + 40
|
||||||
|
TOTAL_H = max(400, y)
|
||||||
|
|
||||||
|
# 创建图片
|
||||||
|
img = Image.new('RGB', (W, TOTAL_H), '#1a1a2e')
|
||||||
|
draw = ImageDraw.Draw(img)
|
||||||
|
|
||||||
|
# 渐变背景
|
||||||
|
for row in range(TOTAL_H):
|
||||||
|
ratio = row / TOTAL_H
|
||||||
|
r = int(26 + (255 - 26) * ratio * 0.1)
|
||||||
|
g = int(26 + (255 - 26) * ratio * 0.1)
|
||||||
|
b = int(46 + (255 - 46) * ratio * 0.1)
|
||||||
|
draw.line([(0, row), (W, row)], fill=(r, g, b))
|
||||||
|
|
||||||
|
# 白色内容区域
|
||||||
|
content_top = 60
|
||||||
|
draw.rectangle(
|
||||||
|
[0, content_top, W - 1, TOTAL_H - 1],
|
||||||
|
fill='#ffffff',
|
||||||
|
)
|
||||||
|
|
||||||
|
# 顶部渐变
|
||||||
|
for row in range(content_top):
|
||||||
|
ratio = row / content_top
|
||||||
|
r = int(42 + (61 - 42) * ratio)
|
||||||
|
g = int(98 + (133 - 98) * ratio)
|
||||||
|
b = int(239 + (255 - 239) * ratio)
|
||||||
|
draw.line([(0, row), (W, row)], fill=(r, g, b))
|
||||||
|
|
||||||
|
# 渲染内容
|
||||||
|
x = PAD
|
||||||
|
cy = content_top + PAD
|
||||||
|
|
||||||
|
def render_elem(elem):
|
||||||
|
nonlocal cy, x
|
||||||
|
tag = elem.name
|
||||||
|
|
||||||
|
if tag == 'h1':
|
||||||
|
text = get_text(elem)
|
||||||
|
lines = wrap_text(text, font_h1, CONTENT_W)
|
||||||
|
for line in lines:
|
||||||
|
draw.text((x, cy), line, fill='#1a1a2e', font=font_h1)
|
||||||
|
cy += 40
|
||||||
|
cy += 15
|
||||||
|
|
||||||
|
elif tag == 'h2':
|
||||||
|
text = get_text(elem)
|
||||||
|
lines = wrap_text(text, font_h2, CONTENT_W)
|
||||||
|
for line in lines:
|
||||||
|
draw.text((x, cy), line, fill='#1a1a2e', font=font_h2)
|
||||||
|
cy += 36
|
||||||
|
cy += 12
|
||||||
|
|
||||||
|
elif tag == 'h3':
|
||||||
|
text = get_text(elem)
|
||||||
|
lines = wrap_text(text, font_h3, CONTENT_W)
|
||||||
|
for line in lines:
|
||||||
|
draw.text((x, cy), line, fill='#1a1a2e', font=font_h3)
|
||||||
|
cy += 32
|
||||||
|
cy += 10
|
||||||
|
|
||||||
|
elif tag == 'p':
|
||||||
|
text = get_text(elem)
|
||||||
|
lines = wrap_text(text, font_body, CONTENT_W - 20)
|
||||||
|
for line in lines:
|
||||||
|
draw.text((x, cy), line, fill='#374151', font=font_body)
|
||||||
|
cy += line_height
|
||||||
|
cy += 8
|
||||||
|
|
||||||
|
elif tag == 'strong' or tag == 'b':
|
||||||
|
text = get_text(elem)
|
||||||
|
lines = wrap_text(text, font_body, CONTENT_W - 20)
|
||||||
|
for line in lines:
|
||||||
|
draw.text((x, cy), line, fill='#1a1a2e', font=font_body)
|
||||||
|
cy += line_height
|
||||||
|
|
||||||
|
elif tag == 'em' or tag == 'i':
|
||||||
|
text = get_text(elem)
|
||||||
|
lines = wrap_text(text, font_body, CONTENT_W - 20)
|
||||||
|
for line in lines:
|
||||||
|
draw.text((x, cy), line, fill='#666666', font=font_body)
|
||||||
|
cy += line_height
|
||||||
|
|
||||||
|
elif tag == 'code' and not elem.find_all(recursive=False):
|
||||||
|
# 行内代码
|
||||||
|
text = get_text(elem)
|
||||||
|
draw.text((x, cy), text, fill='#333333', font=font_code)
|
||||||
|
x += measure_text(text, font_code)
|
||||||
|
|
||||||
|
elif tag == 'pre' or (tag == 'div' and 'codehilite' in elem.get('class', [])):
|
||||||
|
# 代码高亮块
|
||||||
|
text = get_text(elem)
|
||||||
|
lines = text.split('\n') if text else ['']
|
||||||
|
code_h = len(lines) * 20 + 20
|
||||||
|
|
||||||
|
draw.rounded_rectangle(
|
||||||
|
[x, cy, x + CONTENT_W, cy + code_h],
|
||||||
|
radius=8,
|
||||||
|
fill='#f4f4f4',
|
||||||
|
outline='#e0e0e0',
|
||||||
|
width=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
max_chars = int((CONTENT_W - 24) / 7)
|
||||||
|
display_line = line[:max_chars] if max_chars > 0 else line[:80]
|
||||||
|
draw.text((x + 12, cy + 10 + i * 20), display_line, fill='#333333', font=font_code)
|
||||||
|
|
||||||
|
cy += code_h + 15
|
||||||
|
|
||||||
|
elif tag == 'blockquote':
|
||||||
|
text = get_text(elem)
|
||||||
|
lines = wrap_text(text, font_body, CONTENT_W - 30)
|
||||||
|
quote_h = len(lines) * line_height
|
||||||
|
draw.rectangle([x, cy, x + 3, cy + quote_h], fill='#0066cc')
|
||||||
|
# 引用文本
|
||||||
|
for line in lines:
|
||||||
|
draw.text((x + 15, cy), line, fill='#666666', font=font_body)
|
||||||
|
cy += line_height
|
||||||
|
cy += 10
|
||||||
|
|
||||||
|
elif tag == 'ul':
|
||||||
|
for li in elem.find_all('li', recursive=False):
|
||||||
|
text = get_text(li)
|
||||||
|
draw.text((x, cy), '•', fill='#0066cc', font=font_body)
|
||||||
|
draw.text((x + 16, cy), text.strip(), fill='#374151', font=font_body)
|
||||||
|
cy += line_height
|
||||||
|
cy += 8
|
||||||
|
|
||||||
|
elif tag == 'ol':
|
||||||
|
for i, li in enumerate(elem.find_all('li', recursive=False), 1):
|
||||||
|
text = get_text(li)
|
||||||
|
draw.text((x, cy), f'{i}.', fill='#0066cc', font=font_body)
|
||||||
|
draw.text((x + 20, cy), text.strip(), fill='#374151', font=font_body)
|
||||||
|
cy += line_height
|
||||||
|
cy += 8
|
||||||
|
|
||||||
|
elif tag == 'hr':
|
||||||
|
draw.line([(x, cy), (x + CONTENT_W, cy)], fill='#e0e0e0', width=1)
|
||||||
|
cy += 30
|
||||||
|
|
||||||
|
elif tag == 'table':
|
||||||
|
for row in elem.find_all('tr'):
|
||||||
|
cells = row.find_all(['td', 'th'])
|
||||||
|
if cells:
|
||||||
|
cell_x = x
|
||||||
|
cell_w = CONTENT_W // len(cells)
|
||||||
|
for cell in cells:
|
||||||
|
cell_text = get_text(cell).strip()[:15]
|
||||||
|
draw.rectangle([cell_x, cy, cell_x + cell_w, cy + 32], outline='#ddd')
|
||||||
|
draw.text((cell_x + 8, cy + 6), cell_text, fill='#333', font=font_small)
|
||||||
|
cell_x += cell_w
|
||||||
|
cy += 36
|
||||||
|
|
||||||
|
elif tag == 'br':
|
||||||
|
cy += line_height
|
||||||
|
|
||||||
|
for elem in root.children:
|
||||||
|
if hasattr(elem, 'name') and elem.name:
|
||||||
|
render_elem(elem)
|
||||||
|
|
||||||
|
Path(img_path).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
img.save(img_path, 'PNG')
|
||||||
|
print(f'Converted: Markdown -> {img_path}')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
@ -34,8 +371,6 @@ def main():
|
||||||
md_text = Path(input_path).read_text(encoding="utf-8")
|
md_text = Path(input_path).read_text(encoding="utf-8")
|
||||||
markdown_to_png(md_text, output_path)
|
markdown_to_png(md_text, output_path)
|
||||||
|
|
||||||
print(f"Converted: {input_path} -> {output_path}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue