From 7c3d4d9364c41751e85d539b66f2aba9002e4b81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo=20Silva?= Date: Wed, 17 Jul 2019 23:50:09 +0100 Subject: [PATCH] Adjust regex tests for CommonMark, improve inline regexp --- ...test_regex.py => test_regex_commonmark.py} | 46 ++++++++++--------- uberwriter/markup_regex.py | 14 +++--- 2 files changed, 31 insertions(+), 29 deletions(-) rename tests/{test_regex.py => test_regex_commonmark.py} (69%) diff --git a/tests/test_regex.py b/tests/test_regex_commonmark.py similarity index 69% rename from tests/test_regex.py rename to tests/test_regex_commonmark.py index ec818fb..f8179e5 100644 --- a/tests/test_regex.py +++ b/tests/test_regex_commonmark.py @@ -22,10 +22,14 @@ from uberwriter import markup_regex class TestRegex(unittest.TestCase): - """Test cases from Windows CommunityToolkit - https://github.com/windows-toolkit/WindowsCommunityToolkit/blob/master/UnitTests/Markdown/Parse/ + """Test cases based on CommonMark's specs and demo: + - https://spec.commonmark.org/ + - https://spec.commonmark.org/dingus/ - TODO: use decorators. This needs decorators everywhere + CommonMark is the Markdown variant chosen as first-class. It's great and encouraged that + others are supported as well, but when in conflict or undecided, CommonMark should be picked. + + TODO: Use decorators. This needs decorators everywhere. """ def test_bold(self): @@ -36,39 +40,37 @@ class TestRegex(unittest.TestCase): "This is __bold__ text": "bold", "before**middle**end": "middle", "before** middle **end": " middle ", - "before******after": "**" + "empty * * bold": None } for test, result in test_texts.items(): with self.subTest(name=test): match = re.search(markup_regex.BOLD, test) if not match: - self.assertFalse(result) + self.assertFalse(result, msg=test) else: - self.assertEqual(match.group("text"), result) + self.assertEqual(match.group("text"), result, msg=test) def test_header(self): test_texts = { - "#Header 1": "Header 1", - "##Header 2": "Header 2", - "###Header 3": "Header 3", - "####Header 4": "Header 4", - "#####Header 5": "Header 5", - "######Header 6": "Header 6", - "#######Header 6": "#Header 6", - "#": "", - "## # # ##": "# #", - "#######": "", - "before\n#Header\nafter": "Header" + "# Header 1": "Header 1", + "## Header 2": "Header 2", + "### Header 3": "Header 3", + "#### Header 4": "Header 4", + "##### Header 5": "Header 5", + "###### Header 6": "Header 6", + "#": None, + "#######": None, + "before\n# Header\nafter": "Header" } for test, result in test_texts.items(): with self.subTest(name=test): match = re.search(markup_regex.HEADER, test) if not match: - self.assertFalse(result) + self.assertFalse(result, msg=test) else: - self.assertEqual(match.group("text"), result) + self.assertEqual(match.group("text"), result, msg=test) def test_header_under(self): test_texts = { @@ -76,16 +78,16 @@ class TestRegex(unittest.TestCase): "Header 1##\n=": "Header 1##", "Header 2\n-- \n": "Header 2", "Header 1\n=f": None, - "Header 1\n =": None + "Header 1\n =": "Header 1" } for test, result in test_texts.items(): with self.subTest(name=test): match = re.search(markup_regex.HEADER_UNDER, test) if not match: - self.assertFalse(result) + self.assertFalse(result, msg=test) else: - self.assertEqual(match.group("text"), result) + self.assertEqual(match.group("text"), result, msg=test) if __name__ == '__main__': diff --git a/uberwriter/markup_regex.py b/uberwriter/markup_regex.py index ace576e..e7da9e7 100644 --- a/uberwriter/markup_regex.py +++ b/uberwriter/markup_regex.py @@ -1,15 +1,15 @@ import re ITALIC = re.compile( - r"(\*|_)(?P.+?)\1") + r"(\*|_)(?P.*?\S.*?)\1") BOLD = re.compile( - r"(\*\*|__)(?P.+?)\1") + r"(\*\*|__)(?P.*?\S.*?)\1") BOLD_ITALIC = re.compile( - r"((\*\*|__)([*_])|([*_])(\*\*|__))(?P.+?)(?:\5\4|\3\2)") + r"((\*\*|__)([*_])|([*_])(\*\*|__))(?P.*?\S.*?)(?:\5\4|\3\2)") STRIKETHROUGH = re.compile( - r"~~(?P.+?)~~") + r"~~(?P.*?\S.*?)~~") CODE = re.compile( - r"`(?P[^`].*?)`") + r"`(?P[^`].+?)`") LINK = re.compile( r"\[(?P.*)\]\((?P.+?)(?: \"(?P.+)\")?\)") IMAGE = re.compile( @@ -23,9 +23,9 @@ ORDERED_LIST = re.compile( BLOCK_QUOTE = re.compile( r"^ {0,3}(?:> ?)+(?P<text>.+)", re.M) HEADER = re.compile( - r"^ {0,3}(?P<level>#{1,6})(?P<text>[^\n]+)", re.M) + r"^ {0,3}(?P<level>#{1,6}) (?P<text>[^\n]+)", re.M) HEADER_UNDER = re.compile( - r"(?:^\n*|\n\n)(?P<text>[^\s].+)\n[=\-]+(?: +?\n|$)") + r"(?:^\n*|\n\n)(?P<text>[^\s].+)\n {0,3}[=\-]+(?: +?\n|$)") CODE_BLOCK = re.compile( r"(?:^|\n) {0,3}(?P<block>([`~]{3})(?P<text>.+?) {0,3}\2)(?:\s+?\n|$)", re.S) TABLE = re.compile(