Adjust regex tests for CommonMark, improve inline regexp

github/fork/yochananmarqos/patch-1
Gonçalo Silva 2019-07-17 23:50:09 +01:00
parent 23cddba0d0
commit 7c3d4d9364
2 changed files with 31 additions and 29 deletions

View File

@ -22,10 +22,14 @@ from uberwriter import markup_regex
class TestRegex(unittest.TestCase):
"""Test cases from Windows CommunityToolkit
https://github.com/windows-toolkit/WindowsCommunityToolkit/blob/master/UnitTests/Markdown/Parse/
"""Test cases based on CommonMark's specs and demo:
- https://spec.commonmark.org/
- https://spec.commonmark.org/dingus/
TODO: use decorators. This needs decorators everywhere
CommonMark is the Markdown variant chosen as first-class. It's great and encouraged that
others are supported as well, but when in conflict or undecided, CommonMark should be picked.
TODO: Use decorators. This needs decorators everywhere.
"""
def test_bold(self):
@ -36,39 +40,37 @@ class TestRegex(unittest.TestCase):
"This is __bold__ text": "bold",
"before**middle**end": "middle",
"before** middle **end": " middle ",
"before******after": "**"
"empty * * bold": None
}
for test, result in test_texts.items():
with self.subTest(name=test):
match = re.search(markup_regex.BOLD, test)
if not match:
self.assertFalse(result)
self.assertFalse(result, msg=test)
else:
self.assertEqual(match.group("text"), result)
self.assertEqual(match.group("text"), result, msg=test)
def test_header(self):
test_texts = {
"#Header 1": "Header 1",
"##Header 2": "Header 2",
"###Header 3": "Header 3",
"####Header 4": "Header 4",
"#####Header 5": "Header 5",
"######Header 6": "Header 6",
"#######Header 6": "#Header 6",
"#": "",
"## # # ##": "# #",
"#######": "",
"before\n#Header\nafter": "Header"
"# Header 1": "Header 1",
"## Header 2": "Header 2",
"### Header 3": "Header 3",
"#### Header 4": "Header 4",
"##### Header 5": "Header 5",
"###### Header 6": "Header 6",
"#": None,
"#######": None,
"before\n# Header\nafter": "Header"
}
for test, result in test_texts.items():
with self.subTest(name=test):
match = re.search(markup_regex.HEADER, test)
if not match:
self.assertFalse(result)
self.assertFalse(result, msg=test)
else:
self.assertEqual(match.group("text"), result)
self.assertEqual(match.group("text"), result, msg=test)
def test_header_under(self):
test_texts = {
@ -76,16 +78,16 @@ class TestRegex(unittest.TestCase):
"Header 1##\n=": "Header 1##",
"Header 2\n-- \n": "Header 2",
"Header 1\n=f": None,
"Header 1\n =": None
"Header 1\n =": "Header 1"
}
for test, result in test_texts.items():
with self.subTest(name=test):
match = re.search(markup_regex.HEADER_UNDER, test)
if not match:
self.assertFalse(result)
self.assertFalse(result, msg=test)
else:
self.assertEqual(match.group("text"), result)
self.assertEqual(match.group("text"), result, msg=test)
if __name__ == '__main__':

View File

@ -1,15 +1,15 @@
import re
ITALIC = re.compile(
r"(\*|_)(?P<text>.+?)\1")
r"(\*|_)(?P<text>.*?\S.*?)\1")
BOLD = re.compile(
r"(\*\*|__)(?P<text>.+?)\1")
r"(\*\*|__)(?P<text>.*?\S.*?)\1")
BOLD_ITALIC = re.compile(
r"((\*\*|__)([*_])|([*_])(\*\*|__))(?P<text>.+?)(?:\5\4|\3\2)")
r"((\*\*|__)([*_])|([*_])(\*\*|__))(?P<text>.*?\S.*?)(?:\5\4|\3\2)")
STRIKETHROUGH = re.compile(
r"~~(?P<text>.+?)~~")
r"~~(?P<text>.*?\S.*?)~~")
CODE = re.compile(
r"`(?P<text>[^`].*?)`")
r"`(?P<text>[^`].+?)`")
LINK = re.compile(
r"\[(?P<text>.*)\]\((?P<url>.+?)(?: \"(?P<title>.+)\")?\)")
IMAGE = re.compile(
@ -23,9 +23,9 @@ ORDERED_LIST = re.compile(
BLOCK_QUOTE = re.compile(
r"^ {0,3}(?:> ?)+(?P<text>.+)", re.M)
HEADER = re.compile(
r"^ {0,3}(?P<level>#{1,6})(?P<text>[^\n]+)", re.M)
r"^ {0,3}(?P<level>#{1,6}) (?P<text>[^\n]+)", re.M)
HEADER_UNDER = re.compile(
r"(?:^\n*|\n\n)(?P<text>[^\s].+)\n[=\-]+(?: +?\n|$)")
r"(?:^\n*|\n\n)(?P<text>[^\s].+)\n {0,3}[=\-]+(?: +?\n|$)")
CODE_BLOCK = re.compile(
r"(?:^|\n) {0,3}(?P<block>([`~]{3})(?P<text>.+?) {0,3}\2)(?:\s+?\n|$)", re.S)
TABLE = re.compile(