SKILLS/markdown-converter/scripts/md_convert.py

377 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Markdown 转 PNG 图片工具(使用 markdown 库 + BeautifulSoup + Pillow
Dependencies:
- pip install Pillow markdown beautifulsoup4
特点:
- 使用 markdown 库解析,支持完整 GFM
- 使用 BeautifulSoup 解析 HTML
- 纯 Pillow 渲染,无需浏览器
"""
import sys
import os
import re
from pathlib import Path
try:
from PIL import Image, ImageDraw, ImageFont
except ImportError:
print("Error: Pillow not installed. Run: pip install Pillow", file=sys.stderr)
sys.exit(1)
try:
import markdown
except ImportError:
print("Error: markdown not installed. Run: pip install markdown", file=sys.stderr)
sys.exit(1)
try:
from bs4 import BeautifulSoup
except ImportError:
print("Error: beautifulsoup4 not installed. Run: pip install beautifulsoup4", file=sys.stderr)
sys.exit(1)
# ============================================================
# 字体查找
# ============================================================
def _find_font():
"""Find a suitable TrueType font across platforms."""
candidates = []
if sys.platform == "win32":
pf = os.environ.get("WINDIR", r"C:\Windows")
candidates = [
os.path.join(pf, "Fonts", "msyh.ttc"),
os.path.join(pf, "Fonts", "msyhbd.ttc"),
os.path.join(pf, "Fonts", "simhei.ttf"),
os.path.join(pf, "Fonts", "simsun.ttc"),
]
elif sys.platform == "darwin":
candidates = [
"/System/Library/Fonts/PingFang.ttc",
"/System/Library/Fonts/STHeiti Light.ttc",
"/Library/Fonts/Arial Unicode.ttf",
]
else:
candidates = [
"/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
"/usr/share/fonts/opentype/noto/NotoSansSC-Regular.otf",
]
for p in candidates:
if os.path.exists(p):
return p
return None
# ============================================================
# Markdown 转 PNG
# ============================================================
def markdown_to_png(md_text, img_path):
"""将 Markdown 转换为 PNG 图片"""
font_path = _find_font()
# 加载字体
try:
if font_path:
font_h1 = ImageFont.truetype(font_path, 24)
font_h2 = ImageFont.truetype(font_path, 20)
font_h3 = ImageFont.truetype(font_path, 18)
font_body = ImageFont.truetype(font_path, 15)
font_code = ImageFont.truetype(font_path, 13)
font_small = ImageFont.truetype(font_path, 12)
else:
font_h1 = font_h2 = font_h3 = font_body = font_code = font_small = ImageFont.load_default()
except Exception:
font_h1 = font_h2 = font_h3 = font_body = font_code = font_small = ImageFont.load_default()
W = 800
PAD = 40
CONTENT_W = W - PAD * 2
# 使用 markdown 库转换为 HTML再用 BeautifulSoup 解析
md = markdown.Markdown(extensions=['tables', 'fenced_code', 'codehilite', 'nl2br'])
html = md.convert(md_text)
soup = BeautifulSoup(html, 'html.parser')
# 获取 body 或根元素
root = soup.body if soup.body else soup
# 创建临时 draw 对象用于测量
temp_img = Image.new('RGB', (W, 100))
draw = ImageDraw.Draw(temp_img)
def measure_text(text, font):
bbox = draw.textbbox((0, 0), text, font=font)
return bbox[2] - bbox[0]
def wrap_text(text, font, max_width):
lines = []
for paragraph in text.split('\n'):
if not paragraph.strip():
lines.append('')
continue
current = ''
for ch in paragraph:
test = current + ch
if measure_text(test, font) > max_width and current:
lines.append(current)
current = ch
else:
current = test
if current:
lines.append(current)
return lines
def get_text(elem):
"""获取元素的文本内容"""
return elem.get_text()
# 预计算高度
y = PAD
line_height = 26
def calc_height(elem):
nonlocal y
tag = elem.name
if tag in ('h1', 'h2', 'h3'):
text = get_text(elem)
font = {'h1': font_h1, 'h2': font_h2, 'h3': font_h3}[tag]
lines = wrap_text(text, font, CONTENT_W)
h = {'h1': 40, 'h2': 36, 'h3': 32}[tag]
y += len(lines) * h + (15 if tag == 'h1' else 12 if tag == 'h2' else 10)
elif tag == 'p':
text = get_text(elem)
lines = wrap_text(text, font_body, CONTENT_W - 20)
y += len(lines) * line_height + 8
elif tag == 'pre' or (tag == 'div' and 'codehilite' in elem.get('class', [])):
text = get_text(elem)
lines = text.split('\n') if text else ['']
y += len(lines) * 20 + 20
elif tag == 'blockquote':
text = get_text(elem)
lines = wrap_text(text, font_body, CONTENT_W - 30)
y += len(lines) * line_height + 10 + 15 # 边框高度 + 间距
elif tag == 'ul':
for li in elem.find_all('li', recursive=False):
text = get_text(li)
lines = wrap_text(text, font_body, CONTENT_W - 20)
y += len(lines) * line_height + 4
y += 8
elif tag == 'ol':
for li in elem.find_all('li', recursive=False):
text = get_text(li)
lines = wrap_text(text, font_body, CONTENT_W - 20)
y += len(lines) * line_height + 4
y += 8
elif tag == 'hr':
y += 30
elif tag == 'table':
for row in elem.find_all('tr'):
y += 36
y += 20
for elem in root.children:
if hasattr(elem, 'name') and elem.name:
calc_height(elem)
y += PAD + 40
TOTAL_H = max(400, y)
# 创建图片
img = Image.new('RGB', (W, TOTAL_H), '#1a1a2e')
draw = ImageDraw.Draw(img)
# 渐变背景
for row in range(TOTAL_H):
ratio = row / TOTAL_H
r = int(26 + (255 - 26) * ratio * 0.1)
g = int(26 + (255 - 26) * ratio * 0.1)
b = int(46 + (255 - 46) * ratio * 0.1)
draw.line([(0, row), (W, row)], fill=(r, g, b))
# 白色内容区域
content_top = 60
draw.rectangle(
[0, content_top, W - 1, TOTAL_H - 1],
fill='#ffffff',
)
# 顶部渐变
for row in range(content_top):
ratio = row / content_top
r = int(42 + (61 - 42) * ratio)
g = int(98 + (133 - 98) * ratio)
b = int(239 + (255 - 239) * ratio)
draw.line([(0, row), (W, row)], fill=(r, g, b))
# 渲染内容
x = PAD
cy = content_top + PAD
def render_elem(elem):
nonlocal cy, x
tag = elem.name
if tag == 'h1':
text = get_text(elem)
lines = wrap_text(text, font_h1, CONTENT_W)
for line in lines:
draw.text((x, cy), line, fill='#1a1a2e', font=font_h1)
cy += 40
cy += 15
elif tag == 'h2':
text = get_text(elem)
lines = wrap_text(text, font_h2, CONTENT_W)
for line in lines:
draw.text((x, cy), line, fill='#1a1a2e', font=font_h2)
cy += 36
cy += 12
elif tag == 'h3':
text = get_text(elem)
lines = wrap_text(text, font_h3, CONTENT_W)
for line in lines:
draw.text((x, cy), line, fill='#1a1a2e', font=font_h3)
cy += 32
cy += 10
elif tag == 'p':
text = get_text(elem)
lines = wrap_text(text, font_body, CONTENT_W - 20)
for line in lines:
draw.text((x, cy), line, fill='#374151', font=font_body)
cy += line_height
cy += 8
elif tag == 'strong' or tag == 'b':
text = get_text(elem)
lines = wrap_text(text, font_body, CONTENT_W - 20)
for line in lines:
draw.text((x, cy), line, fill='#1a1a2e', font=font_body)
cy += line_height
elif tag == 'em' or tag == 'i':
text = get_text(elem)
lines = wrap_text(text, font_body, CONTENT_W - 20)
for line in lines:
draw.text((x, cy), line, fill='#666666', font=font_body)
cy += line_height
elif tag == 'code' and not elem.find_all(recursive=False):
# 行内代码
text = get_text(elem)
draw.text((x, cy), text, fill='#333333', font=font_code)
x += measure_text(text, font_code)
elif tag == 'pre' or (tag == 'div' and 'codehilite' in elem.get('class', [])):
# 代码高亮块
text = get_text(elem)
lines = text.split('\n') if text else ['']
code_h = len(lines) * 20 + 20
draw.rounded_rectangle(
[x, cy, x + CONTENT_W, cy + code_h],
radius=8,
fill='#f4f4f4',
outline='#e0e0e0',
width=1,
)
for i, line in enumerate(lines):
max_chars = int((CONTENT_W - 24) / 7)
display_line = line[:max_chars] if max_chars > 0 else line[:80]
draw.text((x + 12, cy + 10 + i * 20), display_line, fill='#333333', font=font_code)
cy += code_h + 15
elif tag == 'blockquote':
text = get_text(elem)
lines = wrap_text(text, font_body, CONTENT_W - 30)
quote_h = len(lines) * line_height
draw.rectangle([x, cy, x + 3, cy + quote_h], fill='#0066cc')
# 引用文本
for line in lines:
draw.text((x + 15, cy), line, fill='#666666', font=font_body)
cy += line_height
cy += 10
elif tag == 'ul':
for li in elem.find_all('li', recursive=False):
text = get_text(li)
draw.text((x, cy), '', fill='#0066cc', font=font_body)
draw.text((x + 16, cy), text.strip(), fill='#374151', font=font_body)
cy += line_height
cy += 8
elif tag == 'ol':
for i, li in enumerate(elem.find_all('li', recursive=False), 1):
text = get_text(li)
draw.text((x, cy), f'{i}.', fill='#0066cc', font=font_body)
draw.text((x + 20, cy), text.strip(), fill='#374151', font=font_body)
cy += line_height
cy += 8
elif tag == 'hr':
draw.line([(x, cy), (x + CONTENT_W, cy)], fill='#e0e0e0', width=1)
cy += 30
elif tag == 'table':
for row in elem.find_all('tr'):
cells = row.find_all(['td', 'th'])
if cells:
cell_x = x
cell_w = CONTENT_W // len(cells)
for cell in cells:
cell_text = get_text(cell).strip()[:15]
draw.rectangle([cell_x, cy, cell_x + cell_w, cy + 32], outline='#ddd')
draw.text((cell_x + 8, cy + 6), cell_text, fill='#333', font=font_small)
cell_x += cell_w
cy += 36
elif tag == 'br':
cy += line_height
for elem in root.children:
if hasattr(elem, 'name') and elem.name:
render_elem(elem)
Path(img_path).parent.mkdir(parents=True, exist_ok=True)
img.save(img_path, 'PNG')
print(f'Converted: Markdown -> {img_path}')
def main():
if len(sys.argv) < 3:
print("Usage: python md_convert.py <input.md> <output.png>", file=sys.stderr)
sys.exit(1)
input_path = sys.argv[1]
output_path = sys.argv[2]
if not os.path.exists(input_path):
print(f"Error: Input file not found: {input_path}", file=sys.stderr)
sys.exit(1)
md_text = Path(input_path).read_text(encoding="utf-8")
markdown_to_png(md_text, output_path)
if __name__ == "__main__":
main()