Use CommonMark's regexp for autolink / autoemail

Ref: https://github.com/commonmark/commonmark.js/blob/master/lib/inlines.js#L62-L64

Also avoids special handling in stats counter.
github/fork/yochananmarqos/patch-1
Gonçalo Silva 2019-07-24 23:33:37 +01:00
parent 63ff2659fc
commit 05cdfe0599
3 changed files with 8 additions and 12 deletions

View File

@ -13,7 +13,7 @@ CODE = re.compile(
LINK = re.compile(
r"\[(?P<text>.*)\]\((?P<url>.+?)(?: \"(?P<title>.+)\")?\)")
LINK_ALT = re.compile(
r"(?:<)(?P<url>https?://[^\s]+)(?:>)")
r"<(?P<text>[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*|(?:[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*))>")
IMAGE = re.compile(
r"!\[(?P<text>.*)\]\((?P<url>.+?)(?: \"(?P<title>.+)\")?\)")
HORIZONTAL_RULE = re.compile(

View File

@ -27,11 +27,8 @@ class StatsCounter:
# List of regexp whose matches should be replaced by their "text" group. Order is important.
MARKUP_REGEXP_REPLACE = (
BOLD_ITALIC, ITALIC, BOLD, STRIKETHROUGH, IMAGE, LINK, LIST, ORDERED_LIST, BLOCK_QUOTE,
HEADER, HEADER_UNDER, CODE_BLOCK, TABLE, MATH, FOOTNOTE_ID, FOOTNOTE
)
MARKUP_REGEXP_URL_REPLACE = (
LINK_ALT,
BOLD_ITALIC, ITALIC, BOLD, STRIKETHROUGH, IMAGE, LINK, LINK_ALT, LIST, ORDERED_LIST,
BLOCK_QUOTE, HEADER, HEADER_UNDER, CODE_BLOCK, TABLE, MATH, FOOTNOTE_ID, FOOTNOTE
)
# List of regexp whose matches should be removed. Order is important.
@ -78,8 +75,6 @@ class StatsCounter:
child_conn.close()
return
for regexp in self.MARKUP_REGEXP_URL_REPLACE:
text = re.sub(regexp, r"\g<url>", text)
for regexp in self.MARKUP_REGEXP_REPLACE:
text = re.sub(regexp, r"\g<text>", text)
for regexp in self.MARKUP_REGEXP_REMOVE:

View File

@ -20,8 +20,9 @@ from multiprocessing import Pipe, Process
import gi
from uberwriter import helpers, markup_regex
from uberwriter.markup_regex import STRIKETHROUGH, BOLD_ITALIC, BOLD, ITALIC, IMAGE, LINK, LINK_ALT, \
HORIZONTAL_RULE, LIST, ORDERED_LIST, BLOCK_QUOTE, HEADER, HEADER_UNDER, TABLE, MATH, CODE
from uberwriter.markup_regex import STRIKETHROUGH, BOLD_ITALIC, BOLD, ITALIC, IMAGE, LINK,\
LINK_ALT, HORIZONTAL_RULE, LIST, ORDERED_LIST, BLOCK_QUOTE, HEADER, HEADER_UNDER, TABLE, MATH, \
CODE
gi.require_version('Gtk', '3.0')
from gi.repository import Gtk, GLib
@ -203,11 +204,11 @@ class MarkupHandler:
result.append((tag_name, (), match.start(), match.start("text")))
result.append((tag_name, (), match.end("text"), match.end()))
# - "<url>" (gray out)
# Find "<url>" links (gray out).
matches = re.finditer(LINK_ALT, text)
for match in matches:
result.append((
self.TAG_NAME_GRAY_TEXT, (), match.start("url"), match.end("url")))
self.TAG_NAME_GRAY_TEXT, (), match.start("text"), match.end("text")))
# Find "---" horizontal rule (center).
matches = re.finditer(HORIZONTAL_RULE, text)