Add support for code blocks, improve overall markup handling

This commit adds markup support for code blocks, styling them in a
conservative manner, similar to blockquotes, solely indenting them.

Partially fixes #90

Code-wise, this means marking up around the cursor becomes exponentially
more complex, as a change in one line can affect multiple lines. Solving
it is non-trivial, so the whole document is always marked up.

Marking up the whole document is irrelevant for small to medium
documents, but can incur in a performance penalty for
very large documents (empirical testing: 1M characters takes ~0.15s).
To alleviate this, GLib.idle_add is used to ensure that markup is only
parsed and applied when the UI is idle. Again, small to medium-sized
documents see no difference. For very large documents, markup will be
slightly delayed to allow for a fluid typing experience.

It's important to note that the previous flows frequently used full
document markup: paste, focus mode, and search and replace.
In some extreme cases, doubly parsing (eg. paste + text change).
For very large documents, doing any of these actions would freeze the UI
unconditionally, so in more ways than one this is an upgrade.

Lastly, it's a little overzealous: with over 1M characters the UI itself
struggles more than parsing.

In sum:
* Markup is always applied to the whole document
* The code is simpler
* There is never double work
* Markup is applied when the UI is idle, for a more smooth experience
* Multi-line formatting is now possible to do reliably
ft.font-size^2
Gonçalo Silva 2019-04-10 02:59:00 +01:00
parent c5d2322b96
commit 86c924972b
2 changed files with 115 additions and 67 deletions

View File

@ -71,7 +71,6 @@ class TextView(Gtk.TextView):
# Markup
self.markup = MarkupHandler(self)
self.connect('style-updated', self.markup.on_style_updated)
self.get_buffer().connect('paste-done', self.on_paste_done)
# Preview popover
self.preview_popover = InlinePreview(self)
@ -102,18 +101,14 @@ class TextView(Gtk.TextView):
def set_text(self, text):
text_buffer = self.get_buffer()
text_buffer.set_text(text)
self.markup.apply() # TODO
def on_text_changed(self, *_):
self.markup.apply(True)
self.scroll_to()
def on_paste_done(self, *_):
self.markup.apply()
self.scroll_to()
def on_size_allocate(self, *_):
self.update_vertical_margin()
self.markup.update_margins()
self.markup.update_margins_indents()
def set_focus_mode(self, focus_mode):
"""Toggle focus mode.
@ -137,7 +132,7 @@ class TextView(Gtk.TextView):
def on_button_release_event(self, _widget, _event):
if self.focus_mode:
self.markup.apply(True)
self.markup.apply()
return False
def set_hemingway_mode(self, hemingway_mode):
@ -196,11 +191,9 @@ class TextView(Gtk.TextView):
def on_mark_set(self, _text_buffer, _location, mark, _data=None):
if mark.get_name() == 'insert':
self.markup.apply()
if self.focus_mode:
self.scroll_to(mark)
self.markup.apply(False)
else:
self.markup.apply(True)
elif mark.get_name() == 'gtk_drag_target':
self.scroll_to(mark)
return True

View File

@ -15,7 +15,9 @@
### END LICENSE
import re
import gi
from gi.overrides import GLib
from uberwriter import helpers
@ -25,19 +27,25 @@ from gi.repository import Pango
class MarkupHandler:
# Maximum number of characters for which to markup synchronously.
max_char_sync = 100000
# Regular expressions for various markdown constructs.
regex = {
"ITALIC": re.compile(r"(\*|_)(.*?)\1"),
"BOLD": re.compile(r"(\*\*|__)(.*?)\1"),
"BOLDITALIC": re.compile(r"(\*\*\*|___)(.*?)\1"),
"STRIKETHROUGH": re.compile(r"~~[^ `~\n].+?~~"),
"LIST": re.compile(r"^((?:\t|[ ]{4})*)[\-\*\+][ ].+", re.MULTILINE),
"NUMBEREDLIST": re.compile(r"^((?:\t|[ ]{4})*)((?:\d|[a-z])+[\.\)])[ ].+", re.MULTILINE),
"BLOCKQUOTE": re.compile(r"^[ ]{0,3}(?:\>|(?:\> )+).+", re.MULTILINE),
"HEADER": re.compile(r"^[ ]{0,3}(#{1,6}) .+", re.MULTILINE),
"HEADER_UNDER": re.compile(r"^[ ]{0,3}\w.+\n[ ]{0,3}[\=\-]{3,}", re.MULTILINE),
"HORIZONTALRULE": re.compile(r"^\n([ ]{0,3}[\*\-_]{3,}[ ]*)\n", re.MULTILINE),
"TABLE": re.compile(r"^[\-\+]{5,}\n(.+?)\n[\-\+]{5,}\n", re.DOTALL),
"MATH": re.compile(r"[\$]{1,2}([^` ].+?[^`\\ ])[\$]{1,2}"),
"ITALIC": re.compile(r"(\*|_)(.+?)\1"),
"BOLD": re.compile(r"(\*\*|__)(.+?)\1"),
"BOLDITALIC": re.compile(r"(\*\*\*|___)(.+?)\1"),
"STRIKETHROUGH": re.compile(r"~~.+?~~"),
"HORIZONTALRULE": re.compile(r"\n\n([ ]{0,3}[*\-_]{3,}[ ]*)\n", re.MULTILINE),
"LIST": re.compile(r"^((?:\t|[ ]{4})*)[\-*+] .+", re.MULTILINE),
"NUMERICLIST": re.compile(r"^((\d|[a-z]|#)+[.)]) ", re.MULTILINE),
"NUMBEREDLIST": re.compile(r"^((?:\t|[ ]{4})*)((?:\d|[a-z])+[.)]) .+", re.MULTILINE),
"BLOCKQUOTE": re.compile(r"^[ ]{0,3}(?:>|(?:> )+).+", re.MULTILINE),
"HEADER": re.compile(r"^[ ]{0,3}(#{1,6}) [^\n]+", re.MULTILINE),
"HEADER_UNDER": re.compile(r"^[ ]{0,3}\w.+\n[ ]{0,3}[=\-]{3,}", re.MULTILINE),
"CODE": re.compile(r"(?:^|\n)[ ]{0,3}(([`~]{3}).+?[ ]{0,3}\2)(?:\n|$)", re.DOTALL),
"TABLE": re.compile(r"^[\-+]{5,}\n(.+?)\n[\-+]{5,}\n", re.DOTALL),
"MATH": re.compile(r"[$]{1,2}([^` ].+?[^`\\ ])[$]{1,2}"),
}
def __init__(self, text_view):
@ -46,6 +54,7 @@ class MarkupHandler:
# Styles
buffer = self.text_buffer
self.italic = buffer.create_tag('italic',
weight=Pango.Weight.NORMAL,
style=Pango.Style.ITALIC)
@ -58,13 +67,16 @@ class MarkupHandler:
weight=Pango.Weight.BOLD,
style=Pango.Style.ITALIC)
self.graytext = buffer.create_tag('graytext', foreground='gray')
self.strikethrough = buffer.create_tag('strikethrough', strikethrough=True)
self.centertext = buffer.create_tag('centertext', justification=Gtk.Justification.CENTER)
self.horizontalrule = buffer.create_tag('centertext',
justification=Gtk.Justification.CENTER)
self.invisible = buffer.create_tag('invisible', invisible=True)
self.plaintext = buffer.create_tag('plaintext',
weight=Pango.Weight.NORMAL,
style=Pango.Style.NORMAL,
strikethrough=False,
justification=Gtk.Justification.LEFT)
self.table = buffer.create_tag('table')
self.table.set_property('wrap-mode', Gtk.WrapMode.NONE)
@ -73,34 +85,43 @@ class MarkupHandler:
self.mathtext = buffer.create_tag('mathtext')
# Margins
# A default margin is set to allow negative indents for formatting headers, lists, etc
self.graytext = buffer.create_tag('graytext',
foreground='gray',
weight=Pango.Weight.NORMAL,
style=Pango.Style.NORMAL)
# Margin and indents
# A baseline margin is set to allow negative offsets for formatting headers, lists, etc
self.baseline_margin = 0
self.margins = {}
self.update_margins()
self.margins_indents = {}
self.update_margins_indents()
# Style
self.on_style_updated()
self.version = 0
def on_style_updated(self, *_):
(found, color) = self.text_view.get_style_context().lookup_color('math_text_color')
if not found:
(_, color) = self.text_view.get_style_context().lookup_color('foreground_color')
self.mathtext.set_property("foreground", color.to_string())
def apply(self, around_cursor=False):
buffer = self.text_buffer
if around_cursor:
cursor_mark = buffer.get_insert()
start = buffer.get_iter_at_mark(cursor_mark)
start.backward_lines(3)
end = buffer.get_iter_at_mark(cursor_mark)
end.forward_lines(2)
offset = start.get_offset()
def apply(self):
self.version = self.version + 1
if self.text_buffer.get_char_count() < self.max_char_sync:
self.do_apply()
else:
start = buffer.get_start_iter()
end = buffer.get_end_iter()
offset = 0
GLib.idle_add(self.do_apply, self.version)
def do_apply(self, version=None):
if version is not None and version != self.version:
return
buffer = self.text_buffer
start = buffer.get_start_iter()
end = buffer.get_end_iter()
offset = 0
text = buffer.get_slice(start, end, False)
@ -109,11 +130,14 @@ class MarkupHandler:
buffer.remove_tag(self.bold, start, end)
buffer.remove_tag(self.bolditalic, start, end)
buffer.remove_tag(self.strikethrough, start, end)
buffer.remove_tag(self.horizontalrule, start, end)
buffer.remove_tag(self.plaintext, start, end)
buffer.remove_tag(self.table, start, end)
buffer.remove_tag(self.mathtext, start, end)
buffer.remove_tag(self.centertext, start, end)
for tag in self.margins.values():
for tag in self.margins_indents.values():
buffer.remove_tag(tag, start, end)
buffer.remove_tag(self.graytext, start, end)
buffer.remove_tag(self.graytext, start, end)
# Apply "_italic_" tag (italic)
matches = re.finditer(self.regex["ITALIC"], text)
@ -143,15 +167,24 @@ class MarkupHandler:
end_iter = buffer.get_iter_at_offset(offset + match.end())
buffer.apply_tag(self.strikethrough, start_iter, end_iter)
# Apply "---" horizontal rule tag (center)
matches = re.finditer(self.regex["HORIZONTALRULE"], text)
for match in matches:
start_iter = buffer.get_iter_at_offset(offset + match.start(1))
end_iter = buffer.get_iter_at_offset(offset + match.end(1))
buffer.apply_tag(self.horizontalrule, start_iter, end_iter)
# Apply "* list" tag (offset)
matches = re.finditer(self.regex["LIST"], text)
for match in matches:
start_iter = buffer.get_iter_at_offset(offset + match.start())
end_iter = buffer.get_iter_at_offset(offset + match.end())
# Lists use character+space (eg. "* ")
indent = 2
length = 2
nest = len(match.group(1).replace(" ", "\t"))
buffer.apply_tag(self.get_margin(-indent - 2 * nest), start_iter, end_iter)
margin = -length - 2 * nest
indent = -length - 2 * length * nest
buffer.apply_tag(self.get_margin_indent_tag(margin, indent), start_iter, end_iter)
# Apply "1. numbered list" tag (offset)
matches = re.finditer(self.regex["NUMBEREDLIST"], text)
@ -159,24 +192,26 @@ class MarkupHandler:
start_iter = buffer.get_iter_at_offset(offset + match.start())
end_iter = buffer.get_iter_at_offset(offset + match.end())
# Numeric lists use numbers/letters+dot/parens+space (eg. "123. ")
indent = len(match.group(2)) + 1
length = len(match.group(2)) + 1
nest = len(match.group(1).replace(" ", "\t"))
buffer.apply_tag(self.get_margin(-indent - 2 * nest), start_iter, end_iter)
margin = -length - 2 * nest
indent = -length - 2 * length * nest
buffer.apply_tag(self.get_margin_indent_tag(margin, indent), start_iter, end_iter)
# Apply "> blockquote" tag (offset)
matches = re.finditer(self.regex["BLOCKQUOTE"], text)
for match in matches:
start_iter = buffer.get_iter_at_offset(offset + match.start())
end_iter = buffer.get_iter_at_offset(offset + match.end())
buffer.apply_tag(self.get_margin(2), start_iter, end_iter)
buffer.apply_tag(self.get_margin_indent_tag(2, -2), start_iter, end_iter)
# Apply "#" tag (offset + bold)
matches = re.finditer(self.regex["HEADER"], text)
for match in matches:
start_iter = buffer.get_iter_at_offset(offset + match.start())
end_iter = buffer.get_iter_at_offset(offset + match.end())
indent = -len(match.group(1)) - 1
buffer.apply_tag(self.get_margin(indent), start_iter, end_iter)
margin = -len(match.group(1)) - 1
buffer.apply_tag(self.get_margin_indent_tag(margin, 0), start_iter, end_iter)
buffer.apply_tag(self.bold, start_iter, end_iter)
# Apply "======" header underline tag (bold)
@ -186,12 +221,13 @@ class MarkupHandler:
end_iter = buffer.get_iter_at_offset(offset + match.end())
buffer.apply_tag(self.bold, start_iter, end_iter)
# Apply "---" horizontal rule tag (center)
matches = re.finditer(self.regex["HORIZONTALRULE"], text)
# Apply "```" code tag (offset)
matches = re.finditer(self.regex["CODE"], text)
for match in matches:
start_iter = buffer.get_iter_at_offset(offset + match.start(1))
end_iter = buffer.get_iter_at_offset(offset + match.end(1))
buffer.apply_tag(self.centertext, start_iter, end_iter)
buffer.apply_tag(self.get_margin_indent_tag(0, 2), start_iter, end_iter)
buffer.apply_tag(self.plaintext, start_iter, end_iter)
# Apply "---" table tag (wrap/pixels)
matches = re.finditer(self.regex["TABLE"], text)
@ -219,20 +255,37 @@ class MarkupHandler:
if end.compare(end_sentence) >= 0:
buffer.apply_tag(self.graytext, end_sentence, end)
def get_margin(self, level):
if level not in self.margins:
char_width = helpers.get_char_width(self.text_view)
tag = self.text_buffer.create_tag("indent_" + str(level))
tag.set_property("left-margin", max(self.baseline_margin + char_width * level, 0))
self.margins[level] = tag
return self.margins[level]
# Margin and indent are cumulative. They differ in two ways:
# * Margin is always in the beginning, which means it effectively only affects the first line
# of multi-line text. Indent is applied to every line.
# * Margin level can be negative, as a baseline margin exists from which it can be subtracted.
# Indent is always positive, or 0.
def get_margin_indent_tag(self, margin_level, indent_level):
level = (margin_level, indent_level)
if level not in self.margins_indents:
tag = self.text_buffer.create_tag(
"margin_indent_" + str(margin_level) + "_" + str(indent_level))
margin, indent = self.get_margin_indent(margin_level, indent_level)
tag.set_property("left-margin", margin)
tag.set_property("indent", indent)
self.margins_indents[level] = tag
return tag
else:
return self.margins_indents[level]
def update_margins(self):
def get_margin_indent(self, margin_level, indent_level, char_width=None):
if char_width is None:
char_width = helpers.get_char_width(self.text_view)
margin = max(self.baseline_margin + char_width * margin_level, 0)
indent = char_width * indent_level
return margin, indent
def update_margins_indents(self):
char_width = helpers.get_char_width(self.text_view)
# Adjust tab size, as character width can change
tab_array = Pango.TabArray.new(1, True)
tab_array.set_tab(0, Pango.TabAlign.LEFT, 2 * char_width)
tab_array.set_tab(0, Pango.TabAlign.LEFT, 4 * char_width)
self.text_view.set_tabs(tab_array)
# Adjust baseline margin, as character width can change
@ -241,6 +294,8 @@ class MarkupHandler:
self.text_view.set_left_margin(self.baseline_margin)
self.text_view.set_right_margin(self.baseline_margin)
# Adjust left margins, as character width can change
for level, tag in self.margins.items():
tag.set_property("left-margin", max(self.baseline_margin + char_width * level, 0))
# Adjust margins and indents, as character width can change
for level, tag in self.margins_indents.items():
margin, indent = self.get_margin_indent(*level, char_width)
tag.set_property("left-margin", margin)
tag.set_property("indent", indent)