diff --git a/uberwriter/inline_preview.py b/uberwriter/inline_preview.py index 484ac0a..3ecec57 100644 --- a/uberwriter/inline_preview.py +++ b/uberwriter/inline_preview.py @@ -32,7 +32,7 @@ from uberwriter.text_view_markup_handler import MarkupHandler gi.require_version('Gtk', '3.0') from gi.repository import Gtk, Gdk, GdkPixbuf -from uberwriter import latex_to_PNG +from uberwriter import latex_to_PNG, markup_regex from uberwriter.settings import Settings from uberwriter.fix_table import FixTable @@ -179,7 +179,7 @@ def check_url(url, item, spinner): text = "Error! Reason: %s" % e.reason if not error: - if (response.code / 100) >= 4: + if response.code >= 400: LOGGER.debug("Website not available") text = _("Website is not available") else: @@ -362,15 +362,12 @@ class InlinePreview: text = self.text_buffer.get_text(start_iter, end_iter, False) - math = MarkupHandler.regex["MATH"] - link = MarkupHandler.regex["LINK"] - footnote = re.compile(r'\[\^([^\s]+?)\]') image = re.compile(r"!\[(.*?)\]\((.+?)\)") found_match = False - matches = re.finditer(math, text) + matches = re.finditer(markup_regex.MATH, text) for match in matches: LOGGER.debug(match.group(1)) if match.start() < line_offset < match.end(): @@ -400,7 +397,7 @@ class InlinePreview: if not found_match: # Links - matches = re.finditer(link, text) + matches = re.finditer(markup_regex.LINK, text) for match in matches: if match.start() < line_offset < match.end(): text = text[text.find("http://"):-1] diff --git a/uberwriter/markup_regex.py b/uberwriter/markup_regex.py new file mode 100644 index 0000000..a01e5fc --- /dev/null +++ b/uberwriter/markup_regex.py @@ -0,0 +1,30 @@ +import re + +ITALIC = re.compile( + r"(\*|_)(?P.+?)\1") +BOLD = re.compile( + r"(\*\*|__)(?P.+?)\1") +BOLD_ITALIC = re.compile( + r"(\*\*\*|___)(?P.+?)\1") +STRIKETHROUGH = re.compile( + r"~~(?P.+?)~~") +LINK = re.compile( + r"\[(?P.*)\]\((?P.+?)\)") +HORIZONTAL_RULE = re.compile( + r"(?:^\n*|\n\n)(?P[ ]{0,3}[*\-_]{3,}[ ]*)(?:\n+|$)") +LIST = re.compile( + r"(?:^\n*|\n\n)(?P(?:\t|[ ]{4})*)[\-*+]([ ]+)(?P.+(?:\n+ \2.+)*)") +ORDERED_LIST = re.compile( + r"(?:^\n*|\n\n)(?P(?:\t|[ ]{4})*)(?P(?:\d|[a-z])+[.)]) (?P.+(?:\n+ {2}\2.+)*)") +BLOCK_QUOTE = re.compile( + r"^[ ]{0,3}(?:> ?)+(?P.+)", re.M) +HEADER = re.compile( + r"^[ ]{0,3}(?P#{1,6}) (?P[^\n]+)", re.M) +HEADER_UNDER = re.compile( + r"(?:^\n*|\n\n)(?P[^\s].+)\n[ ]{0,3}[=\-]+(?:\s+?\n|$)") +CODE_BLOCK = re.compile( + r"(?:^|\n)[ ]{0,3}(?P([`~]{3})(?P.+?)[ ]{0,3}\2)(?:\s+?\n|$)", re.S) +TABLE = re.compile( + r"^[\-+]{5,}\n(?P.+?)\n[\-+]{5,}\n", re.S) +MATH = re.compile( + r"([$]{1,2})[^` ](?P.+?)[^`\\ ]\1") diff --git a/uberwriter/text_view_markup_handler.py b/uberwriter/text_view_markup_handler.py index a203a91..230e87b 100644 --- a/uberwriter/text_view_markup_handler.py +++ b/uberwriter/text_view_markup_handler.py @@ -19,7 +19,7 @@ import re import gi from gi.overrides import GLib -from uberwriter import helpers +from uberwriter import helpers, markup_regex gi.require_version('Gtk', '3.0') from gi.repository import Gtk @@ -30,24 +30,6 @@ class MarkupHandler: # Maximum number of characters for which to markup synchronously. max_char_sync = 100000 - # Regular expressions for various markdown constructs. - regex = { - "ITALIC": re.compile(r"(\*|_)(.+?)\1"), - "BOLD": re.compile(r"(\*\*|__)(.+?)\1"), - "BOLDITALIC": re.compile(r"(\*\*\*|___)(.+?)\1"), - "STRIKETHROUGH": re.compile(r"~~.+?~~"), - "LINK": re.compile(r"(\[).*(\]\(.+?\))"), - "HORIZONTALRULE": re.compile(r"\n\n([ ]{0,3}[*\-_]{3,}[ ]*)\n\n", re.MULTILINE), - "LIST": re.compile(r"^((?:\t|[ ]{4})*)[\-*+] .+", re.MULTILINE), - "NUMBEREDLIST": re.compile(r"^((?:\t|[ ]{4})*)((?:\d|[a-z])+[.)]) .+", re.MULTILINE), - "BLOCKQUOTE": re.compile(r"^[ ]{0,3}(?:>|(?:> )+).+", re.MULTILINE), - "HEADER": re.compile(r"^[ ]{0,3}(#{1,6}) [^\n]+", re.MULTILINE), - "HEADER_UNDER": re.compile(r"^\n[ ]{0,3}\w.+\n[ ]{0,3}[=\-]{3,}", re.MULTILINE), - "CODE": re.compile(r"(?:^|\n)[ ]{0,3}(([`~]{3}).+?[ ]{0,3}\2)(?:\n|$)", re.DOTALL), - "TABLE": re.compile(r"^[\-+]{5,}\n(.+?)\n[\-+]{5,}\n", re.DOTALL), - "MATH": re.compile(r"[$]{1,2}([^` ].+?[^`\\ ])[$]{1,2}"), - } - def __init__(self, text_view): self.text_view = text_view self.text_buffer = text_view.get_buffer() @@ -141,113 +123,115 @@ class MarkupHandler: buffer.remove_tag(self.graytext, start, end) # Apply "_italic_" tag (italic) - matches = re.finditer(self.regex["ITALIC"], text) + matches = re.finditer(markup_regex.MATH, text) for match in matches: start_iter = buffer.get_iter_at_offset(offset + match.start()) end_iter = buffer.get_iter_at_offset(offset + match.end()) buffer.apply_tag(self.italic, start_iter, end_iter) # Apply "**bold**" tag (bold) - matches = re.finditer(self.regex["BOLD"], text) + matches = re.finditer(markup_regex.BOLD, text) for match in matches: start_iter = buffer.get_iter_at_offset(offset + match.start()) end_iter = buffer.get_iter_at_offset(offset + match.end()) buffer.apply_tag(self.bold, start_iter, end_iter) # Apply "***bolditalic***" tag (bold/italic) - matches = re.finditer(self.regex["BOLDITALIC"], text) + matches = re.finditer(markup_regex.BOLD_ITALIC, text) for match in matches: start_iter = buffer.get_iter_at_offset(offset + match.start()) end_iter = buffer.get_iter_at_offset(offset + match.end()) buffer.apply_tag(self.bolditalic, start_iter, end_iter) # Apply "~~strikethrough~~" tag (strikethrough) - matches = re.finditer(self.regex["STRIKETHROUGH"], text) + matches = re.finditer(markup_regex.STRIKETHROUGH, text) for match in matches: start_iter = buffer.get_iter_at_offset(offset + match.start()) end_iter = buffer.get_iter_at_offset(offset + match.end()) buffer.apply_tag(self.strikethrough, start_iter, end_iter) - matches = re.finditer(self.regex["LINK"], text) + # Apply "[description](url)" (gray out) + matches = re.finditer(markup_regex.LINK, text) for match in matches: - start_iter = buffer.get_iter_at_offset(offset + match.start(1)) - end_iter = buffer.get_iter_at_offset(offset + match.end(1)) + start_iter = buffer.get_iter_at_offset(offset + match.start("text") - 1) + end_iter = start_iter.copy() + end_iter.forward_char() buffer.apply_tag(self.graytext, start_iter, end_iter) - start_iter = buffer.get_iter_at_offset(offset + match.start(2)) - end_iter = buffer.get_iter_at_offset(offset + match.end(2)) + start_iter = buffer.get_iter_at_offset(offset + match.start("url") - 2) + end_iter = buffer.get_iter_at_offset(offset + match.end("url") + 1) buffer.apply_tag(self.graytext, start_iter, end_iter) # Apply "---" horizontal rule tag (center) - matches = re.finditer(self.regex["HORIZONTALRULE"], text) + matches = re.finditer(markup_regex.HORIZONTAL_RULE, text) for match in matches: - start_iter = buffer.get_iter_at_offset(offset + match.start(1)) - end_iter = buffer.get_iter_at_offset(offset + match.end(1)) + start_iter = buffer.get_iter_at_offset(offset + match.start("symbols")) + end_iter = buffer.get_iter_at_offset(offset + match.end("symbols")) buffer.apply_tag(self.horizontalrule, start_iter, end_iter) # Apply "* list" tag (offset) - matches = re.finditer(self.regex["LIST"], text) + matches = re.finditer(markup_regex.LIST, text) for match in matches: start_iter = buffer.get_iter_at_offset(offset + match.start()) end_iter = buffer.get_iter_at_offset(offset + match.end()) # Lists use character+space (eg. "* ") length = 2 - nest = len(match.group(1).replace(" ", "\t")) + nest = len(match.group("indent").replace(" ", "\t")) margin = -length - 2 * nest indent = -length - 2 * length * nest buffer.apply_tag(self.get_margin_indent_tag(margin, indent), start_iter, end_iter) - # Apply "1. numbered list" tag (offset) - matches = re.finditer(self.regex["NUMBEREDLIST"], text) + # Apply "1. ordered list" tag (offset) + matches = re.finditer(markup_regex.ORDERED_LIST, text) for match in matches: start_iter = buffer.get_iter_at_offset(offset + match.start()) end_iter = buffer.get_iter_at_offset(offset + match.end()) # Numeric lists use numbers/letters+dot/parens+space (eg. "123. ") - length = len(match.group(2)) + 1 - nest = len(match.group(1).replace(" ", "\t")) + length = len(match.group("prefix")) + 1 + nest = len(match.group("indent").replace(" ", "\t")) margin = -length - 2 * nest indent = -length - 2 * length * nest buffer.apply_tag(self.get_margin_indent_tag(margin, indent), start_iter, end_iter) # Apply "> blockquote" tag (offset) - matches = re.finditer(self.regex["BLOCKQUOTE"], text) + matches = re.finditer(markup_regex.BLOCK_QUOTE, text) for match in matches: start_iter = buffer.get_iter_at_offset(offset + match.start()) end_iter = buffer.get_iter_at_offset(offset + match.end()) buffer.apply_tag(self.get_margin_indent_tag(2, -2), start_iter, end_iter) # Apply "#" tag (offset + bold) - matches = re.finditer(self.regex["HEADER"], text) + matches = re.finditer(markup_regex.HEADER, text) for match in matches: start_iter = buffer.get_iter_at_offset(offset + match.start()) end_iter = buffer.get_iter_at_offset(offset + match.end()) - margin = -len(match.group(1)) - 1 + margin = -len(match.group("level")) - 1 buffer.apply_tag(self.get_margin_indent_tag(margin, 0), start_iter, end_iter) buffer.apply_tag(self.bold, start_iter, end_iter) # Apply "======" header underline tag (bold) - matches = re.finditer(self.regex["HEADER_UNDER"], text) + matches = re.finditer(markup_regex.HEADER_UNDER, text) for match in matches: start_iter = buffer.get_iter_at_offset(offset + match.start()) end_iter = buffer.get_iter_at_offset(offset + match.end()) buffer.apply_tag(self.bold, start_iter, end_iter) # Apply "```" code tag (offset) - matches = re.finditer(self.regex["CODE"], text) + matches = re.finditer(markup_regex.CODE_BLOCK, text) for match in matches: - start_iter = buffer.get_iter_at_offset(offset + match.start(1)) - end_iter = buffer.get_iter_at_offset(offset + match.end(1)) + start_iter = buffer.get_iter_at_offset(offset + match.start("block")) + end_iter = buffer.get_iter_at_offset(offset + match.end("block")) buffer.apply_tag(self.get_margin_indent_tag(0, 2), start_iter, end_iter) buffer.apply_tag(self.plaintext, start_iter, end_iter) - # Apply "---" table tag (wrap/pixels) - matches = re.finditer(self.regex["TABLE"], text) - for match in matches: - start_iter = buffer.get_iter_at_offset(offset + match.start()) - end_iter = buffer.get_iter_at_offset(offset + match.end()) - buffer.apply_tag(self.table, start_iter, end_iter) + # # Apply "---" table tag (wrap/pixels) + # matches = re.finditer(markup_regex.TABLE, text) + # for match in matches: + # start_iter = buffer.get_iter_at_offset(offset + match.start()) + # end_iter = buffer.get_iter_at_offset(offset + match.end()) + # buffer.apply_tag(self.table, start_iter, end_iter) # Apply "$math$" tag (colorize) - matches = re.finditer(self.regex["MATH"], text) + matches = re.finditer(markup_regex.MATH, text) for match in matches: start_iter = buffer.get_iter_at_offset(offset + match.start()) end_iter = buffer.get_iter_at_offset(offset + match.end())