From 05cdfe05998fecc9048f36286ab10215d75bd553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo=20Silva?= Date: Wed, 24 Jul 2019 23:33:37 +0100 Subject: [PATCH] Use CommonMark's regexp for autolink / autoemail Ref: https://github.com/commonmark/commonmark.js/blob/master/lib/inlines.js#L62-L64 Also avoids special handling in stats counter. --- uberwriter/markup_regex.py | 2 +- uberwriter/stats_counter.py | 9 ++------- uberwriter/text_view_markup_handler.py | 9 +++++---- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/uberwriter/markup_regex.py b/uberwriter/markup_regex.py index d0fa219..57f5fcb 100644 --- a/uberwriter/markup_regex.py +++ b/uberwriter/markup_regex.py @@ -13,7 +13,7 @@ CODE = re.compile( LINK = re.compile( r"\[(?P.*)\]\((?P.+?)(?: \"(?P.+)\")?\)") LINK_ALT = re.compile( - r"(?:<)(?P<url>https?://[^\s]+)(?:>)") + r"<(?P<text>[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*|(?:[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*))>") IMAGE = re.compile( r"!\[(?P<text>.*)\]\((?P<url>.+?)(?: \"(?P<title>.+)\")?\)") HORIZONTAL_RULE = re.compile( diff --git a/uberwriter/stats_counter.py b/uberwriter/stats_counter.py index 8c1feab..8c1e342 100644 --- a/uberwriter/stats_counter.py +++ b/uberwriter/stats_counter.py @@ -27,11 +27,8 @@ class StatsCounter: # List of regexp whose matches should be replaced by their "text" group. Order is important. MARKUP_REGEXP_REPLACE = ( - BOLD_ITALIC, ITALIC, BOLD, STRIKETHROUGH, IMAGE, LINK, LIST, ORDERED_LIST, BLOCK_QUOTE, - HEADER, HEADER_UNDER, CODE_BLOCK, TABLE, MATH, FOOTNOTE_ID, FOOTNOTE - ) - MARKUP_REGEXP_URL_REPLACE = ( - LINK_ALT, + BOLD_ITALIC, ITALIC, BOLD, STRIKETHROUGH, IMAGE, LINK, LINK_ALT, LIST, ORDERED_LIST, + BLOCK_QUOTE, HEADER, HEADER_UNDER, CODE_BLOCK, TABLE, MATH, FOOTNOTE_ID, FOOTNOTE ) # List of regexp whose matches should be removed. Order is important. @@ -78,8 +75,6 @@ class StatsCounter: child_conn.close() return - for regexp in self.MARKUP_REGEXP_URL_REPLACE: - text = re.sub(regexp, r"\g<url>", text) for regexp in self.MARKUP_REGEXP_REPLACE: text = re.sub(regexp, r"\g<text>", text) for regexp in self.MARKUP_REGEXP_REMOVE: diff --git a/uberwriter/text_view_markup_handler.py b/uberwriter/text_view_markup_handler.py index 57bab60..e0089bc 100644 --- a/uberwriter/text_view_markup_handler.py +++ b/uberwriter/text_view_markup_handler.py @@ -20,8 +20,9 @@ from multiprocessing import Pipe, Process import gi from uberwriter import helpers, markup_regex -from uberwriter.markup_regex import STRIKETHROUGH, BOLD_ITALIC, BOLD, ITALIC, IMAGE, LINK, LINK_ALT, \ - HORIZONTAL_RULE, LIST, ORDERED_LIST, BLOCK_QUOTE, HEADER, HEADER_UNDER, TABLE, MATH, CODE +from uberwriter.markup_regex import STRIKETHROUGH, BOLD_ITALIC, BOLD, ITALIC, IMAGE, LINK,\ + LINK_ALT, HORIZONTAL_RULE, LIST, ORDERED_LIST, BLOCK_QUOTE, HEADER, HEADER_UNDER, TABLE, MATH, \ + CODE gi.require_version('Gtk', '3.0') from gi.repository import Gtk, GLib @@ -203,11 +204,11 @@ class MarkupHandler: result.append((tag_name, (), match.start(), match.start("text"))) result.append((tag_name, (), match.end("text"), match.end())) - # - "<url>" (gray out) + # Find "<url>" links (gray out). matches = re.finditer(LINK_ALT, text) for match in matches: result.append(( - self.TAG_NAME_GRAY_TEXT, (), match.start("url"), match.end("url"))) + self.TAG_NAME_GRAY_TEXT, (), match.start("text"), match.end("text"))) # Find "---" horizontal rule (center). matches = re.finditer(HORIZONTAL_RULE, text)