refactor: 修改转化方法
This commit is contained in:
parent
01867ed9dc
commit
6fc0be476f
|
|
@ -1,84 +1,45 @@
|
||||||
---
|
---
|
||||||
name: markdown-converter
|
name: markdown-converter
|
||||||
description: A versatile Markdown conversion tool that supports converting Markdown to HTML, PNG images, and plain text formats.
|
description: A simple tool to convert Markdown to PNG images using browser engine.
|
||||||
metadata: {"clawdbot":{"emoji":"📝","os":["linux","darwin","win32"]}}
|
metadata: {"clawdbot":{"emoji":"🖼️","os":["linux","darwin","win32"]}}
|
||||||
---
|
---
|
||||||
|
|
||||||
# Markdown Converter
|
# Markdown to PNG Converter
|
||||||
|
|
||||||
A versatile Markdown conversion tool that supports converting Markdown to HTML, PNG images, and plain text formats.
|
A simple tool to convert Markdown documents to PNG images using Chromium browser engine.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- **HTML Conversion**: High-quality Markdown to HTML conversion
|
- **Browser-based rendering**: High quality output using real browser engine
|
||||||
- **PNG Output**: Render Markdown as PNG images
|
- **Full CSS support**: Supports complex Markdown with tables, code blocks, etc.
|
||||||
- **CJK Support**: Uses system CJK fonts for Chinese character rendering
|
- **Code highlighting**: Syntax highlighting for code blocks
|
||||||
- **Code Highlighting**: Syntax highlighting for code blocks
|
- **CJK support**: Uses system fonts for Chinese character rendering
|
||||||
- **Clean Output**: Removes invisible Unicode characters
|
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install matplotlib html2text pygments markdown
|
pip install markdown2image playwright && playwright install chromium
|
||||||
```
|
```
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
- **matplotlib**: PNG image rendering with excellent CJK support
|
- **markdown2image**: Markdown to image conversion
|
||||||
- **html2text**: HTML and Markdown conversion
|
- **playwright**: Browser automation (Chromium)
|
||||||
- **Pygments**: Code syntax highlighting
|
|
||||||
- **markdown**: Python Markdown processor
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Convert to HTML
|
# Convert Markdown to PNG
|
||||||
python scripts/md_convert.py input.md output.html
|
|
||||||
|
|
||||||
# Convert to PNG
|
|
||||||
python scripts/md_convert.py input.md output.png
|
python scripts/md_convert.py input.md output.png
|
||||||
|
|
||||||
# Convert to plain text
|
|
||||||
python scripts/md_convert.py input.md output.txt
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Output Formats
|
|
||||||
|
|
||||||
### HTML
|
|
||||||
- Complete HTML document structure
|
|
||||||
- Inline CSS styling
|
|
||||||
- Code syntax highlighting
|
|
||||||
- Responsive design
|
|
||||||
|
|
||||||
### PNG Card
|
|
||||||
- White card background
|
|
||||||
- Large title font
|
|
||||||
- Automatic text wrapping
|
|
||||||
- CJK character support
|
|
||||||
|
|
||||||
### Plain Text
|
|
||||||
- Plain text output
|
|
||||||
- Preserves basic formatting
|
|
||||||
- Removes invisible characters
|
|
||||||
|
|
||||||
## Supported Platforms
|
## Supported Platforms
|
||||||
|
|
||||||
- **Windows**: Uses system CJK fonts
|
- **Windows**: Uses Chromium browser
|
||||||
- **macOS**: Uses PingFang and other system fonts
|
- **macOS**: Uses Chromium browser
|
||||||
- **Linux**: Uses NotoSansCJK and other fonts
|
- **Linux**: Uses Chromium browser
|
||||||
|
|
||||||
## Workflow
|
|
||||||
|
|
||||||
```
|
|
||||||
Markdown Input
|
|
||||||
↓
|
|
||||||
[html2text / markdown library]
|
|
||||||
↓
|
|
||||||
HTML / PNG / Plain Text
|
|
||||||
```
|
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
- PNG rendering requires Chinese fonts to be installed on the system
|
- Requires Chromium browser (installed via `playwright install chromium`)
|
||||||
- Code highlighting requires Pygments support
|
- Chinese fonts are supported via system fonts
|
||||||
- Large files may require longer processing time
|
|
||||||
|
|
|
||||||
|
|
@ -1,581 +1,27 @@
|
||||||
"""Markdown 转换器 - 支持 HTML / PNG / Plain Text 输出
|
"""Markdown to PNG converter
|
||||||
|
|
||||||
Dependencies:
|
Dependencies:
|
||||||
- HTML/PNG 输出: pip install Pillow html2text pygments markdown
|
- pip install markdown2image playwright && playwright install chromium
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import html
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from markdown2image import Markdown2Image
|
||||||
# ============================================================
|
|
||||||
# 字体查找
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
def _find_font():
|
|
||||||
"""Find a suitable TrueType font across platforms."""
|
|
||||||
candidates = []
|
|
||||||
if sys.platform == "win32":
|
|
||||||
pf = os.environ.get("WINDIR", r"C:\Windows")
|
|
||||||
candidates = [
|
|
||||||
os.path.join(pf, "Fonts", "msyh.ttc"),
|
|
||||||
os.path.join(pf, "Fonts", "msyhbd.ttc"),
|
|
||||||
os.path.join(pf, "Fonts", "simhei.ttf"),
|
|
||||||
os.path.join(pf, "Fonts", "simsun.ttc"),
|
|
||||||
]
|
|
||||||
elif sys.platform == "darwin":
|
|
||||||
candidates = [
|
|
||||||
"/System/Library/Fonts/PingFang.ttc",
|
|
||||||
"/System/Library/Fonts/STHeiti Light.ttc",
|
|
||||||
"/Library/Fonts/Arial Unicode.ttf",
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
candidates = [
|
|
||||||
"/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
|
|
||||||
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
|
|
||||||
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
|
||||||
"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
|
|
||||||
]
|
|
||||||
for p in candidates:
|
|
||||||
if os.path.exists(p):
|
|
||||||
return p
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# 工具函数
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
def _clean_invisible_chars(text):
|
|
||||||
"""清理不可见的 Unicode 字符"""
|
|
||||||
if not text:
|
|
||||||
return ""
|
|
||||||
text = re.sub(r'[\u200b-\u200f\u2028-\u202f\ufeff\u00ad]', '', text)
|
|
||||||
text = re.sub(r'[\xa0\u3000]', ' ', text)
|
|
||||||
text = re.sub(r'[\uff00-\uffef]', '', text)
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def _decode_html_entities(text):
|
|
||||||
"""解码 HTML 实体"""
|
|
||||||
entities = {
|
|
||||||
'<': '<',
|
|
||||||
'>': '>',
|
|
||||||
'&': '&',
|
|
||||||
'"': '"',
|
|
||||||
''': "'",
|
|
||||||
''': "'",
|
|
||||||
}
|
|
||||||
for k, v in entities.items():
|
|
||||||
text = text.replace(k, v)
|
|
||||||
# 处理特殊引号
|
|
||||||
text = text.replace('"', '"').replace('"', '"')
|
|
||||||
text = text.replace(''', "'").replace(''', "'")
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# Markdown 解析
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
def parse_markdown(md_text):
|
|
||||||
"""解析 Markdown 文本,提取标题、代码块、段落等元素"""
|
|
||||||
if not md_text:
|
|
||||||
return []
|
|
||||||
|
|
||||||
md_text = _clean_invisible_chars(md_text)
|
|
||||||
lines = md_text.split('\n')
|
|
||||||
elements = []
|
|
||||||
current_paragraph = []
|
|
||||||
|
|
||||||
def flush_paragraph():
|
|
||||||
nonlocal current_paragraph
|
|
||||||
if current_paragraph:
|
|
||||||
text = ' '.join(current_paragraph)
|
|
||||||
if text.strip():
|
|
||||||
elements.append(('paragraph', text.strip()))
|
|
||||||
current_paragraph = []
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
stripped = line.strip()
|
|
||||||
|
|
||||||
# 跳过空行
|
|
||||||
if not stripped:
|
|
||||||
flush_paragraph()
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 标题
|
|
||||||
header_match = re.match(r'^(#{1,6})\s+(.+)$', stripped)
|
|
||||||
if header_match:
|
|
||||||
flush_paragraph()
|
|
||||||
level = len(header_match.group(1))
|
|
||||||
elements.append(('header', level, header_match.group(2).strip()))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 代码块
|
|
||||||
if stripped.startswith('```'):
|
|
||||||
flush_paragraph()
|
|
||||||
elements.append(('codeblock', stripped[3:].strip()))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 无序列表
|
|
||||||
list_match = re.match(r'^[\-\*+]\s+(.+)$', stripped)
|
|
||||||
if list_match:
|
|
||||||
flush_paragraph()
|
|
||||||
elements.append(('list_item', list_match.group(1).strip()))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 有序列表
|
|
||||||
ordered_match = re.match(r'^\d+\.\s+(.+)$', stripped)
|
|
||||||
if ordered_match:
|
|
||||||
flush_paragraph()
|
|
||||||
elements.append(('ordered_item', ordered_match.group(1).strip()))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 引用
|
|
||||||
if stripped.startswith('>'):
|
|
||||||
flush_paragraph()
|
|
||||||
content = stripped[1:].strip()
|
|
||||||
elements.append(('quote', content))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 水平线
|
|
||||||
if re.match(r'^[\-\*_]{3,}$', stripped):
|
|
||||||
flush_paragraph()
|
|
||||||
elements.append(('hr',))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 链接或图片
|
|
||||||
link_match = re.match(r'!?\[([^\]]+)\]\([^\)]+\)', stripped)
|
|
||||||
if link_match:
|
|
||||||
flush_paragraph()
|
|
||||||
elements.append(('link', link_match.group(1)))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 默认作为段落处理
|
|
||||||
current_paragraph.append(stripped)
|
|
||||||
|
|
||||||
flush_paragraph()
|
|
||||||
return elements
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# 文本换行
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
def _wrap_text(text, font, max_width, draw):
|
|
||||||
"""Wrap text to fit within max_width pixels."""
|
|
||||||
lines = []
|
|
||||||
for paragraph in text.split("\n"):
|
|
||||||
if not paragraph.strip():
|
|
||||||
lines.append("")
|
|
||||||
continue
|
|
||||||
current = ""
|
|
||||||
for ch in paragraph:
|
|
||||||
test = current + ch
|
|
||||||
bbox = draw.textbbox((0, 0), test, font=font)
|
|
||||||
if bbox[2] - bbox[0] > max_width and current:
|
|
||||||
lines.append(current)
|
|
||||||
current = ch
|
|
||||||
else:
|
|
||||||
current = test
|
|
||||||
if current:
|
|
||||||
lines.append(current)
|
|
||||||
return lines
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# HTML 转换
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
def markdown_to_html(md_text):
|
|
||||||
"""将 Markdown 转换为 HTML"""
|
|
||||||
if not md_text:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
md_text = _clean_invisible_chars(md_text)
|
|
||||||
elements = parse_markdown(md_text)
|
|
||||||
|
|
||||||
try:
|
|
||||||
import html
|
|
||||||
except ImportError:
|
|
||||||
import urllib.parse as html
|
|
||||||
|
|
||||||
try:
|
|
||||||
import markdown
|
|
||||||
from markdown.extensions import codehilite
|
|
||||||
|
|
||||||
md = markdown.Markdown(extensions=['codehilite', 'fenced_code', 'tables'])
|
|
||||||
html_content = md.convert(md_text)
|
|
||||||
return _html_template(html_content)
|
|
||||||
except ImportError:
|
|
||||||
# 降级处理:使用简单的转换
|
|
||||||
return _simple_markdown_to_html(md_text)
|
|
||||||
|
|
||||||
|
|
||||||
def _simple_markdown_to_html(md_text):
|
|
||||||
"""简单的 Markdown 到 HTML 转换(无外部依赖)"""
|
|
||||||
lines = md_text.split('\n')
|
|
||||||
html_lines = []
|
|
||||||
|
|
||||||
in_codeblock = False
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
stripped = line.strip()
|
|
||||||
|
|
||||||
# 代码块开始/结束
|
|
||||||
if stripped.startswith('```'):
|
|
||||||
if in_codeblock:
|
|
||||||
html_lines.append('</code></pre>')
|
|
||||||
in_codeblock = False
|
|
||||||
else:
|
|
||||||
lang = stripped[3:].strip() or ''
|
|
||||||
lang_attr = f' class="language-{lang}"' if lang else ''
|
|
||||||
html_lines.append(f'<pre><code lang="{lang}">')
|
|
||||||
in_codeblock = True
|
|
||||||
continue
|
|
||||||
|
|
||||||
if in_codeblock:
|
|
||||||
html_lines.append(html.escape(line))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 标题
|
|
||||||
header_match = re.match(r'^(#{1,6})\s+(.+)$', stripped)
|
|
||||||
if header_match:
|
|
||||||
level = len(header_match.group(1))
|
|
||||||
content = header_match.group(2)
|
|
||||||
html_lines.append(f'<h{level}>{_decode_html_entities(content)}</h{level}>')
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 水平线
|
|
||||||
if re.match(r'^[\-\*_]{3,}$', stripped):
|
|
||||||
html_lines.append('<hr>')
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 引用
|
|
||||||
if stripped.startswith('>'):
|
|
||||||
content = stripped[1:].strip()
|
|
||||||
html_lines.append(f'<blockquote>{_decode_html_entities(content)}</blockquote>')
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 列表项
|
|
||||||
list_match = re.match(r'^[\-\*+]\s+(.+)$', stripped)
|
|
||||||
if list_match:
|
|
||||||
content = list_match.group(1)
|
|
||||||
html_lines.append(f'<li>{_decode_html_entities(content)}</li>')
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 段落
|
|
||||||
if stripped:
|
|
||||||
# 处理粗体和斜体
|
|
||||||
text = _decode_html_entities(stripped)
|
|
||||||
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
|
|
||||||
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text)
|
|
||||||
text = re.sub(r'__(.+?)__', r'<strong>\1</strong>', text)
|
|
||||||
text = re.sub(r'_(.+?)_', r'<em>\1</em>', text)
|
|
||||||
text = re.sub(r'`(.+?)`', r'<code>\1</code>', text)
|
|
||||||
html_lines.append(f'<p>{text}</p>')
|
|
||||||
|
|
||||||
return _html_template('\n'.join(html_lines))
|
|
||||||
|
|
||||||
|
|
||||||
def _html_template(content):
|
|
||||||
"""生成完整的 HTML 文档"""
|
|
||||||
return f"""<!DOCTYPE html>
|
|
||||||
<html lang="zh-CN">
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8">
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
<title>Markdown Document</title>
|
|
||||||
<style>
|
|
||||||
body {{
|
|
||||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
|
||||||
line-height: 1.6;
|
|
||||||
max-width: 800px;
|
|
||||||
margin: 0 auto;
|
|
||||||
padding: 20px;
|
|
||||||
color: #333;
|
|
||||||
}}
|
|
||||||
h1, h2, h3, h4, h5, h6 {{
|
|
||||||
margin-top: 1.5em;
|
|
||||||
margin-bottom: 0.5em;
|
|
||||||
font-weight: 600;
|
|
||||||
}}
|
|
||||||
h1 {{ font-size: 2em; border-bottom: 2px solid #333; }}
|
|
||||||
h2 {{ font-size: 1.5em; border-bottom: 1px solid #ddd; }}
|
|
||||||
code {{
|
|
||||||
background: #f4f4f4;
|
|
||||||
padding: 2px 6px;
|
|
||||||
border-radius: 3px;
|
|
||||||
font-family: 'Consolas', 'Monaco', monospace;
|
|
||||||
}}
|
|
||||||
pre {{
|
|
||||||
background: #f4f4f4;
|
|
||||||
padding: 16px;
|
|
||||||
border-radius: 6px;
|
|
||||||
overflow-x: auto;
|
|
||||||
}}
|
|
||||||
pre code {{
|
|
||||||
background: none;
|
|
||||||
padding: 0;
|
|
||||||
}}
|
|
||||||
blockquote {{
|
|
||||||
border-left: 4px solid #ddd;
|
|
||||||
margin: 0;
|
|
||||||
padding-left: 16px;
|
|
||||||
color: #666;
|
|
||||||
}}
|
|
||||||
a {{
|
|
||||||
color: #0066cc;
|
|
||||||
}}
|
|
||||||
hr {{
|
|
||||||
border: none;
|
|
||||||
border-top: 1px solid #ddd;
|
|
||||||
margin: 24px 0;
|
|
||||||
}}
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
{content}
|
|
||||||
</body>
|
|
||||||
</html>"""
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# PNG 渲染 (使用 matplotlib)
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
def _get_matplotlib_font():
|
|
||||||
"""获取支持中文的 matplotlib 字体(通过字体文件路径)"""
|
|
||||||
import matplotlib
|
|
||||||
import matplotlib.font_manager as fm
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
# Linux 中文字体路径
|
|
||||||
linux_font_paths = [
|
|
||||||
'/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc',
|
|
||||||
'/usr/share/fonts/opentype/noto/NotoSansSC-Regular.otf',
|
|
||||||
'/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc',
|
|
||||||
'/usr/share/fonts/truetype/wqy/wqy-microhei.ttc',
|
|
||||||
'/usr/share/fonts/truetype/wqy/wqy-microhei.ttc',
|
|
||||||
'/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf',
|
|
||||||
'/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc',
|
|
||||||
'/usr/share/fonts/truetype/arphic/uming.ttc',
|
|
||||||
'/usr/share/fonts/truetype/arphic/ukai.ttc',
|
|
||||||
]
|
|
||||||
|
|
||||||
# Windows 中文字体路径
|
|
||||||
if sys.platform == "win32":
|
|
||||||
windir = os.environ.get("WINDIR", r"C:\Windows")
|
|
||||||
windows_font_paths = [
|
|
||||||
os.path.join(windir, "Fonts", "msyh.ttc"),
|
|
||||||
os.path.join(windir, "Fonts", "msyhbd.ttc"),
|
|
||||||
os.path.join(windir, "Fonts", "simhei.ttf"),
|
|
||||||
os.path.join(windir, "Fonts", "simsun.ttc"),
|
|
||||||
os.path.join(windir, "Fonts", "STHeiti Light.ttc"),
|
|
||||||
]
|
|
||||||
linux_font_paths.extend(windows_font_paths)
|
|
||||||
|
|
||||||
# macOS 中文字体路径
|
|
||||||
elif sys.platform == "darwin":
|
|
||||||
mac_font_paths = [
|
|
||||||
'/System/Library/Fonts/PingFang.ttc',
|
|
||||||
'/System/Library/Fonts/STHeiti Light.ttc',
|
|
||||||
'/Library/Fonts/Arial Unicode.ttf',
|
|
||||||
'/System/Library/Fonts/Supplemental/Arial Unicode.ttf',
|
|
||||||
]
|
|
||||||
linux_font_paths.extend(mac_font_paths)
|
|
||||||
|
|
||||||
# 查找存在的字体文件
|
|
||||||
for font_path in linux_font_paths:
|
|
||||||
if os.path.exists(font_path):
|
|
||||||
# 清除字体缓存并加载指定字体
|
|
||||||
fm.fontManager.addfont(font_path)
|
|
||||||
font = fm.FontProperties(fname=font_path)
|
|
||||||
# 验证字体可以显示中文
|
|
||||||
return font
|
|
||||||
|
|
||||||
# 如果没找到,返回 None 让 matplotlib 使用默认
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def markdown_to_png(md_text, img_path):
|
def markdown_to_png(md_text, img_path):
|
||||||
"""将 Markdown 渲染为 PNG 图片(使用 matplotlib)"""
|
"""将 Markdown 渲染为 PNG 图片"""
|
||||||
try:
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import matplotlib.patches as patches
|
|
||||||
import matplotlib.font_manager as fm
|
|
||||||
import matplotlib
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError("matplotlib not installed. Run: pip install matplotlib")
|
|
||||||
|
|
||||||
# 设置非交互式后端
|
|
||||||
matplotlib.use('Agg')
|
|
||||||
|
|
||||||
# 获取中文字体
|
|
||||||
font = _get_matplotlib_font()
|
|
||||||
|
|
||||||
elements = parse_markdown(md_text)
|
|
||||||
|
|
||||||
W, PAD = 10, 0.5 # 英寸, 边距
|
|
||||||
FIG_H = 2.0 # 初始高度
|
|
||||||
LINE_H = 0.35 # 每行高度
|
|
||||||
CODE_H = 0.5 # 代码块初始高度
|
|
||||||
|
|
||||||
# 计算所需高度
|
|
||||||
y = 2.5 # 顶部空间
|
|
||||||
for elem in elements:
|
|
||||||
if elem[0] == 'header':
|
|
||||||
level = elem[1]
|
|
||||||
text = elem[2]
|
|
||||||
chars_per_line = 50 if level <= 2 else 60
|
|
||||||
lines = max(1, len(text) // chars_per_line + 1)
|
|
||||||
y += lines * (0.5 if level <= 2 else 0.4) + 0.2
|
|
||||||
elif elem[0] == 'paragraph':
|
|
||||||
chars_per_line = 60
|
|
||||||
lines = max(1, len(elem[1]) // chars_per_line + 1)
|
|
||||||
y += lines * 0.35 + 0.3
|
|
||||||
elif elem[0] == 'codeblock':
|
|
||||||
lines = elem[1].count('\n') + 2
|
|
||||||
y += lines * 0.3 + 0.2
|
|
||||||
elif elem[0] in ('list_item', 'quote'):
|
|
||||||
chars_per_line = 55
|
|
||||||
lines = max(1, len(elem[1]) // chars_per_line + 1)
|
|
||||||
y += lines * 0.32 + 0.15
|
|
||||||
|
|
||||||
FIG_H = max(8, y)
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(W, FIG_H))
|
|
||||||
fig.patch.set_facecolor('#ffffff')
|
|
||||||
ax.set_facecolor('#ffffff')
|
|
||||||
|
|
||||||
# 标题栏背景
|
|
||||||
header = patches.Rectangle((0, FIG_H - 1.2), W, 1.0, linewidth=0, facecolor='#2d3748')
|
|
||||||
ax.add_patch(header)
|
|
||||||
|
|
||||||
# 标题
|
|
||||||
ax.text(0.5, FIG_H - 0.6, 'Markdown Document',
|
|
||||||
fontsize=18, fontweight='bold', color='white',
|
|
||||||
fontproperties=font, ha='left', va='center')
|
|
||||||
|
|
||||||
ax.text(0.5, FIG_H - 1.0, 'Converted from Markdown',
|
|
||||||
fontsize=10, color='#888888', fontproperties=font, ha='left', va='center')
|
|
||||||
|
|
||||||
ax.set_xlim(0, W)
|
|
||||||
ax.set_ylim(0, FIG_H)
|
|
||||||
ax.axis('off')
|
|
||||||
|
|
||||||
cy = FIG_H - 1.5
|
|
||||||
|
|
||||||
for elem in elements:
|
|
||||||
if elem[0] == 'header':
|
|
||||||
level = elem[1]
|
|
||||||
text = elem[2]
|
|
||||||
size = 16 if level <= 2 else 14
|
|
||||||
weight = 'bold' if level == 1 else 'normal'
|
|
||||||
color = '#1a1a2e' if level == 1 else '#2d3748'
|
|
||||||
ax.text(PAD, cy, text, fontsize=size, fontweight=weight,
|
|
||||||
color=color, fontproperties=font, ha='left', va='top')
|
|
||||||
cy -= size * 0.04 + 0.15
|
|
||||||
|
|
||||||
elif elem[0] == 'paragraph':
|
|
||||||
ax.text(PAD, cy, elem[1], fontsize=11, color='#374151',
|
|
||||||
fontproperties=font, ha='left', va='top', wrap=True)
|
|
||||||
lines = max(1, len(elem[1]) // 60 + 1)
|
|
||||||
cy -= lines * 0.35 + 0.25
|
|
||||||
|
|
||||||
elif elem[0] == 'codeblock':
|
|
||||||
code_h = max(0.5, (elem[1].count('\n') + 2) * 0.3)
|
|
||||||
code_box = patches.Rectangle((PAD, cy - code_h), W - PAD * 2, code_h,
|
|
||||||
linewidth=1, edgecolor='#e0e0e0', facecolor='#f4f4f4')
|
|
||||||
ax.add_patch(code_box)
|
|
||||||
ax.text(PAD + 0.1, cy - 0.15, elem[1][:500], fontsize=9,
|
|
||||||
color='#333333', fontfamily='monospace', va='top')
|
|
||||||
cy -= code_h + 0.2
|
|
||||||
|
|
||||||
elif elem[0] == 'list_item':
|
|
||||||
ax.text(PAD, cy, f'\u2022 {elem[1]}', fontsize=11, color='#374151',
|
|
||||||
fontproperties=font, ha='left', va='top')
|
|
||||||
cy -= 0.35
|
|
||||||
|
|
||||||
elif elem[0] == 'quote':
|
|
||||||
ax.plot([PAD, PAD, PAD + 0.05, PAD + 0.05],
|
|
||||||
[cy, cy - 0.4, cy - 0.4, cy - 0.6],
|
|
||||||
color='#0066cc', linewidth=2)
|
|
||||||
ax.text(PAD + 0.15, cy - 0.1, elem[1], fontsize=11, color='#666666',
|
|
||||||
fontproperties=font, ha='left', va='top')
|
|
||||||
cy -= 0.5
|
|
||||||
|
|
||||||
elif elem[0] == 'hr':
|
|
||||||
ax.axhline(y=cy, color='#e0e0e0', linewidth=1, xmin=0.05, xmax=0.95)
|
|
||||||
cy -= 0.3
|
|
||||||
|
|
||||||
plt.tight_layout(pad=0)
|
|
||||||
|
|
||||||
Path(img_path).parent.mkdir(parents=True, exist_ok=True)
|
Path(img_path).parent.mkdir(parents=True, exist_ok=True)
|
||||||
plt.savefig(img_path, format='png', dpi=150, bbox_inches='tight',
|
|
||||||
facecolor='#ffffff', edgecolor='none')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
|
m2i = Markdown2Image()
|
||||||
|
m2i.b64_decode_and_dump(markdown_text=md_text, output_path=img_path)
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# 纯文本转换
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
def markdown_to_text(md_text):
|
|
||||||
"""将 Markdown 转换为纯文本"""
|
|
||||||
if not md_text:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
md_text = _clean_invisible_chars(md_text)
|
|
||||||
elements = parse_markdown(md_text)
|
|
||||||
|
|
||||||
lines = []
|
|
||||||
|
|
||||||
for elem in elements:
|
|
||||||
if elem[0] == 'header':
|
|
||||||
level = elem[1]
|
|
||||||
prefix = "#" * level + " "
|
|
||||||
lines.append(f"{prefix}{elem[2]}")
|
|
||||||
lines.append("")
|
|
||||||
|
|
||||||
elif elem[0] == 'paragraph':
|
|
||||||
lines.append(elem[1])
|
|
||||||
lines.append("")
|
|
||||||
|
|
||||||
elif elem[0] == 'codeblock':
|
|
||||||
lines.append("```")
|
|
||||||
lines.append(elem[1])
|
|
||||||
lines.append("```")
|
|
||||||
lines.append("")
|
|
||||||
|
|
||||||
elif elem[0] == 'list_item':
|
|
||||||
lines.append(f"• {elem[1]}")
|
|
||||||
|
|
||||||
elif elem[0] == 'ordered_item':
|
|
||||||
lines.append(f" {elem[1]}")
|
|
||||||
|
|
||||||
elif elem[0] == 'quote':
|
|
||||||
lines.append(f"> {elem[1]}")
|
|
||||||
|
|
||||||
elif elem[0] == 'hr':
|
|
||||||
lines.append("─" * 50)
|
|
||||||
lines.append("")
|
|
||||||
|
|
||||||
return '\n'.join(lines)
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# 主函数
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) < 3:
|
if len(sys.argv) < 3:
|
||||||
print("Usage: python md_convert.py <input.md> <output.{html|png|txt}>", file=sys.stderr)
|
print("Usage: python md_convert.py <input.md> <output.png>", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
input_path = sys.argv[1]
|
input_path = sys.argv[1]
|
||||||
|
|
@ -586,22 +32,7 @@ def main():
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
md_text = Path(input_path).read_text(encoding="utf-8")
|
md_text = Path(input_path).read_text(encoding="utf-8")
|
||||||
ext = Path(output_path).suffix.lower()
|
markdown_to_png(md_text, output_path)
|
||||||
|
|
||||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
if ext == ".html":
|
|
||||||
html_content = markdown_to_html(md_text)
|
|
||||||
Path(output_path).write_text(html_content, encoding="utf-8")
|
|
||||||
elif ext == ".png":
|
|
||||||
markdown_to_png(md_text, output_path)
|
|
||||||
elif ext == ".txt":
|
|
||||||
text_content = markdown_to_text(md_text)
|
|
||||||
Path(output_path).write_text(text_content, encoding="utf-8")
|
|
||||||
else:
|
|
||||||
print(f"Error: Unsupported output format: {ext}", file=sys.stderr)
|
|
||||||
print("Supported formats: .html, .png, .txt", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
print(f"Converted: {input_path} -> {output_path}")
|
print(f"Converted: {input_path} -> {output_path}")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue