You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
340 lines
12 KiB
340 lines
12 KiB
From 6e5ba41803cc8c3192f001b3ede9b74454220bda Mon Sep 17 00:00:00 2001
|
|
From: Tiago de Paula <tiagodepalves@gmail.com>
|
|
Date: Mon, 9 May 2022 09:39:31 -0300
|
|
Subject: [PATCH] Update to Mistune 2.0.2 (#1764)
|
|
|
|
Co-authored-by: Steven Silvester <steven.silvester@ieee.org>
|
|
---
|
|
nbconvert/filters/markdown_mistune.py | 212 ++++++++++++++------------
|
|
setup.py | 2 +-
|
|
2 files changed, 119 insertions(+), 95 deletions(-)
|
|
|
|
diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
|
|
index 382a5388..636e1e8c 100644
|
|
--- a/nbconvert/filters/markdown_mistune.py
|
|
+++ b/nbconvert/filters/markdown_mistune.py
|
|
@@ -21,7 +21,7 @@ except ImportError:
|
|
from cgi import escape as html_escape
|
|
|
|
import bs4
|
|
-import mistune
|
|
+from mistune import BlockParser, HTMLRenderer, InlineParser, Markdown
|
|
from pygments import highlight
|
|
from pygments.formatters import HtmlFormatter
|
|
from pygments.lexers import get_lexer_by_name
|
|
@@ -34,158 +34,183 @@ class InvalidNotebook(Exception):
|
|
pass
|
|
|
|
|
|
-class MathBlockGrammar(mistune.BlockGrammar):
|
|
- """This defines a single regex comprised of the different patterns that
|
|
- identify math content spanning multiple lines. These are used by the
|
|
- MathBlockLexer.
|
|
+class MathBlockParser(BlockParser):
|
|
+ """This acts as a pass-through to the MathInlineParser. It is needed in
|
|
+ order to avoid other block level rules splitting math sections apart.
|
|
"""
|
|
|
|
- multi_math_str = "|".join(
|
|
- [r"^\$\$.*?\$\$", r"^\\\\\[.*?\\\\\]", r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}"]
|
|
+ MULTILINE_MATH = re.compile(
|
|
+ r"(?<!\\)[$]{2}.*?(?<!\\)[$]{2}|"
|
|
+ r"\\\\\[.*?\\\\\]|"
|
|
+ r"\\begin\{([a-z]*\*?)\}.*?\\end\{\1\}",
|
|
+ re.DOTALL,
|
|
)
|
|
- multiline_math = re.compile(multi_math_str, re.DOTALL)
|
|
|
|
+ RULE_NAMES = ("multiline_math",) + BlockParser.RULE_NAMES
|
|
|
|
-class MathBlockLexer(mistune.BlockLexer):
|
|
- """This acts as a pass-through to the MathInlineLexer. It is needed in
|
|
- order to avoid other block level rules splitting math sections apart.
|
|
- """
|
|
+ # Regex for header that doesn't require space after '#'
|
|
+ AXT_HEADING = re.compile(r" {0,3}(#{1,6})(?!#+)\s*([^\n]*?)$")
|
|
|
|
- default_rules = ["multiline_math"] + mistune.BlockLexer.default_rules
|
|
+ def parse_multiline_math(self, m, state):
|
|
+ """Pass token through mutiline math."""
|
|
+ return {"type": "multiline_math", "text": m.group(0)}
|
|
|
|
- def __init__(self, rules=None, **kwargs):
|
|
- if rules is None:
|
|
- rules = MathBlockGrammar()
|
|
- super().__init__(rules, **kwargs)
|
|
|
|
- def parse_multiline_math(self, m):
|
|
- """Add token to pass through mutiline math."""
|
|
- self.tokens.append({"type": "multiline_math", "text": m.group(0)})
|
|
+def _dotall(pattern):
|
|
+ """Make the '.' special character match any character inside the pattern, including a newline.
|
|
|
|
-
|
|
-class MathInlineGrammar(mistune.InlineGrammar):
|
|
- """This defines different ways of declaring math objects that should be
|
|
- passed through to mathjax unaffected. These are used by the MathInlineLexer.
|
|
+ This is implemented with the inline flag `(?s:...)` and is equivalent to using `re.DOTALL` when
|
|
+ it is the only pattern used. It is necessary since `mistune>=2.0.0`, where the pattern is passed
|
|
+ to the undocumented `re.Scanner`.
|
|
"""
|
|
-
|
|
- inline_math = re.compile(r"^\$(.+?)\$|^\\\\\((.+?)\\\\\)", re.DOTALL)
|
|
- block_math = re.compile(r"^\$\$(.*?)\$\$|^\\\\\[(.*?)\\\\\]", re.DOTALL)
|
|
- latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}", re.DOTALL)
|
|
- text = re.compile(r"^[\s\S]+?(?=[\\<!\[_*`~$]|https?://| {2,}\n|$)")
|
|
+ return f"(?s:{pattern})"
|
|
|
|
|
|
-class MathInlineLexer(mistune.InlineLexer):
|
|
- r"""This interprets the content of LaTeX style math objects using the rules
|
|
- defined by the MathInlineGrammar.
|
|
+class MathInlineParser(InlineParser):
|
|
+ r"""This interprets the content of LaTeX style math objects.
|
|
|
|
In particular this grabs ``$$...$$``, ``\\[...\\]``, ``\\(...\\)``, ``$...$``,
|
|
and ``\begin{foo}...\end{foo}`` styles for declaring mathematics. It strips
|
|
delimiters from all these varieties, and extracts the type of environment
|
|
in the last case (``foo`` in this example).
|
|
"""
|
|
- default_rules = [
|
|
- "block_math",
|
|
- "inline_math",
|
|
+ BLOCK_MATH_TEX = _dotall(r"(?<!\\)\$\$(.*?)(?<!\\)\$\$")
|
|
+ BLOCK_MATH_LATEX = _dotall(r"(?<!\\)\\\\\[(.*?)(?<!\\)\\\\\]")
|
|
+ INLINE_MATH_TEX = _dotall(r"(?<![$\\])\$(.+?)(?<![$\\])\$")
|
|
+ INLINE_MATH_LATEX = _dotall(r"(?<!\\)\\\\\((.*?)(?<!\\)\\\\\)")
|
|
+ LATEX_ENVIRONMENT = _dotall(r"\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}")
|
|
+
|
|
+ # The order is important here
|
|
+ RULE_NAMES = (
|
|
+ "block_math_tex",
|
|
+ "block_math_latex",
|
|
+ "inline_math_tex",
|
|
+ "inline_math_latex",
|
|
"latex_environment",
|
|
- ] + mistune.InlineLexer.default_rules
|
|
-
|
|
- def __init__(self, renderer, rules=None, **kwargs):
|
|
- if rules is None:
|
|
- rules = MathInlineGrammar()
|
|
- super().__init__(renderer, rules, **kwargs)
|
|
-
|
|
- def output_inline_math(self, m):
|
|
- return self.renderer.inline_math(m.group(1) or m.group(2))
|
|
-
|
|
- def output_block_math(self, m):
|
|
- return self.renderer.block_math(m.group(1) or m.group(2) or "")
|
|
-
|
|
- def output_latex_environment(self, m):
|
|
- return self.renderer.latex_environment(m.group(1), m.group(2))
|
|
-
|
|
-
|
|
-class MarkdownWithMath(mistune.Markdown):
|
|
- def __init__(self, renderer, **kwargs):
|
|
- if "inline" not in kwargs:
|
|
- kwargs["inline"] = MathInlineLexer
|
|
- if "block" not in kwargs:
|
|
- kwargs["block"] = MathBlockLexer
|
|
- super().__init__(renderer, **kwargs)
|
|
-
|
|
- def output_multiline_math(self):
|
|
- return self.inline(self.token["text"])
|
|
-
|
|
-
|
|
-class IPythonRenderer(mistune.Renderer):
|
|
- def block_code(self, code, lang):
|
|
- if lang:
|
|
+ ) + InlineParser.RULE_NAMES
|
|
+
|
|
+ def parse_block_math_tex(self, m, state):
|
|
+ # sometimes the Scanner keeps the final '$$', so we use the
|
|
+ # full matched string and remove the math markers
|
|
+ text = m.group(0)[2:-2]
|
|
+ return "block_math", text
|
|
+
|
|
+ def parse_block_math_latex(self, m, state):
|
|
+ text = m.group(1)
|
|
+ return "block_math", text
|
|
+
|
|
+ def parse_inline_math_tex(self, m, state):
|
|
+ text = m.group(1)
|
|
+ return "inline_math", text
|
|
+
|
|
+ def parse_inline_math_latex(self, m, state):
|
|
+ text = m.group(1)
|
|
+ return "inline_math", text
|
|
+
|
|
+ def parse_latex_environment(self, m, state):
|
|
+ name, text = m.group(1), m.group(2)
|
|
+ return "latex_environment", name, text
|
|
+
|
|
+
|
|
+class MarkdownWithMath(Markdown):
|
|
+ def __init__(self, renderer, block=None, inline=None, plugins=None):
|
|
+ if block is None:
|
|
+ block = MathBlockParser()
|
|
+ if inline is None:
|
|
+ inline = MathInlineParser(renderer, hard_wrap=False)
|
|
+ super().__init__(renderer, block, inline, plugins)
|
|
+
|
|
+ def render(self, s):
|
|
+ """Compatibility method with `mistune==0.8.4`."""
|
|
+ return self.parse(s)
|
|
+
|
|
+
|
|
+class IPythonRenderer(HTMLRenderer):
|
|
+ def __init__(
|
|
+ self,
|
|
+ escape=True,
|
|
+ allow_harmful_protocols=True,
|
|
+ embed_images=False,
|
|
+ exclude_anchor_links=False,
|
|
+ anchor_link_text="¶",
|
|
+ path="",
|
|
+ attachments=None,
|
|
+ ):
|
|
+ super().__init__(escape, allow_harmful_protocols)
|
|
+ self.embed_images = embed_images
|
|
+ self.exclude_anchor_links = exclude_anchor_links
|
|
+ self.anchor_link_text = anchor_link_text
|
|
+ self.path = path
|
|
+ if attachments is not None:
|
|
+ self.attachments = attachments
|
|
+ else:
|
|
+ self.attachments = {}
|
|
+
|
|
+ def block_code(self, code, info=None):
|
|
+ if info:
|
|
try:
|
|
+ lang = info.strip().split(None, 1)[0]
|
|
lexer = get_lexer_by_name(lang, stripall=True)
|
|
except ClassNotFound:
|
|
code = lang + "\n" + code
|
|
lang = None
|
|
|
|
if not lang:
|
|
- return "\n<pre><code>%s</code></pre>\n" % mistune.escape(code)
|
|
+ return super().block_code(code)
|
|
|
|
formatter = HtmlFormatter()
|
|
return highlight(code, lexer, formatter)
|
|
|
|
def block_html(self, html):
|
|
- embed_images = self.options.get("embed_images", False)
|
|
-
|
|
- if embed_images:
|
|
+ if self.embed_images:
|
|
html = self._html_embed_images(html)
|
|
|
|
return super().block_html(html)
|
|
|
|
def inline_html(self, html):
|
|
- embed_images = self.options.get("embed_images", False)
|
|
-
|
|
- if embed_images:
|
|
+ if self.embed_images:
|
|
html = self._html_embed_images(html)
|
|
|
|
return super().inline_html(html)
|
|
|
|
- def header(self, text, level, raw=None):
|
|
- html = super().header(text, level, raw=raw)
|
|
- if self.options.get("exclude_anchor_links"):
|
|
+ def heading(self, text, level):
|
|
+ html = super().heading(text, level)
|
|
+ if self.exclude_anchor_links:
|
|
return html
|
|
- anchor_link_text = self.options.get("anchor_link_text", "¶")
|
|
- return add_anchor(html, anchor_link_text=anchor_link_text)
|
|
+ return add_anchor(html, anchor_link_text=self.anchor_link_text)
|
|
|
|
def escape_html(self, text):
|
|
return html_escape(text)
|
|
|
|
+ def multiline_math(self, text):
|
|
+ return text
|
|
+
|
|
def block_math(self, text):
|
|
- return "$$%s$$" % self.escape_html(text)
|
|
+ return f"$${self.escape_html(text)}$$"
|
|
|
|
def latex_environment(self, name, text):
|
|
- name = self.escape_html(name)
|
|
- text = self.escape_html(text)
|
|
- return rf"\begin{{{name}}}{text}\end{{{name}}}"
|
|
+ name, text = self.escape_html(name), self.escape_html(text)
|
|
+ return f"\\begin{{{name}}}{text}\\end{{{name}}}"
|
|
|
|
def inline_math(self, text):
|
|
- return "$%s$" % self.escape_html(text)
|
|
+ return f"${self.escape_html(text)}$"
|
|
|
|
- def image(self, src, title, text):
|
|
+ def image(self, src, text, title):
|
|
"""Rendering a image with title and text.
|
|
|
|
:param src: source link of the image.
|
|
- :param title: title text of the image.
|
|
:param text: alt text of the image.
|
|
+ :param title: title text of the image.
|
|
"""
|
|
- attachments = self.options.get("attachments", {})
|
|
attachment_prefix = "attachment:"
|
|
- embed_images = self.options.get("embed_images", False)
|
|
|
|
if src.startswith(attachment_prefix):
|
|
name = src[len(attachment_prefix) :]
|
|
|
|
- if name not in attachments:
|
|
+ if name not in self.attachments:
|
|
raise InvalidNotebook(f"missing attachment: {name}")
|
|
|
|
- attachment = attachments[name]
|
|
+ attachment = self.attachments[name]
|
|
# we choose vector over raster, and lossless over lossy
|
|
preferred_mime_types = ["image/svg+xml", "image/png", "image/jpeg"]
|
|
for preferred_mime_type in preferred_mime_types:
|
|
@@ -197,13 +222,13 @@ class IPythonRenderer(mistune.Renderer):
|
|
data = attachment[mime_type]
|
|
src = "data:" + mime_type + ";base64," + data
|
|
|
|
- elif embed_images:
|
|
+ elif self.embed_images:
|
|
base64_url = self._src_to_base64(src)
|
|
|
|
if base64_url is not None:
|
|
src = base64_url
|
|
|
|
- return super().image(src, title, text)
|
|
+ return super().image(src, text, title)
|
|
|
|
def _src_to_base64(self, src):
|
|
"""Turn the source file into a base64 url.
|
|
@@ -211,8 +236,7 @@ class IPythonRenderer(mistune.Renderer):
|
|
:param src: source link of the file.
|
|
:return: the base64 url or None if the file was not found.
|
|
"""
|
|
- path = self.options.get("path", "")
|
|
- src_path = os.path.join(path, src)
|
|
+ src_path = os.path.join(self.path, src)
|
|
|
|
if not os.path.exists(src_path):
|
|
return None
|
|
diff --git a/setup.py b/setup.py
|
|
index 7220a875..2dfa2534 100644
|
|
--- a/setup.py
|
|
+++ b/setup.py
|
|
@@ -245,7 +245,7 @@ setup_args["install_requires"] = [
|
|
"jupyter_core>=4.7",
|
|
"jupyterlab_pygments",
|
|
"MarkupSafe>=2.0",
|
|
- "mistune>=0.8.1,<2",
|
|
+ "mistune>=2.0.2",
|
|
"nbclient>=0.5.0",
|
|
"nbformat>=5.1",
|
|
"packaging",
|
|
--
|
|
2.35.1
|
|
|