cmsmanage/tasks/clean_markdown.py

28 lines
610 B
Python
Raw Normal View History

import nh3
from markdownx.utils import markdownify
# fmt: off
MARKDOWN_TAGS = [
"h1", "h2", "h3", "h4", "h5", "h6",
"b", "i", "strong", "em", "tt", "sub", "sup",
"p", "br", "span", "div",
"blockquote", "code", "pre",
"hr",
"ul", "ol", "li",
"dl", "dd", "dt",
"img",
"a",
"table", "thead", "tbody", "tr", "th", "td",
]
MARKDOWN_ATTRS = {
"*": ["id"],
"img": ["src", "alt", "title"],
"a": ["href", "alt", "title"],
}
def markdown_to_clean_html(md: str) -> str:
x = nh3.clean(markdownify(md), tags=MARKDOWN_TAGS, attributes=MARKDOWN_ATTRS)
return x