Move markup to a worker process

A worker thread works in practice, but the GIL takes a significant toll
as the code is computationally heavy. The result is a hogged UI,
specially when other threads are involved (eg. stats counter).

A worker process is faster, hogs the UI significantly less, at the cost
of slightly higher memory usage.

As side-effects:
- UberWriter can now comfortably handle much larger documents
- Text selection is much more responsive
github/fork/yochananmarqos/patch-1
Gonçalo Silva 2019-06-09 03:29:09 +01:00
parent bb279d0379
commit 931d92bdfd
2 changed files with 262 additions and 189 deletions

View File

@ -1,12 +1,11 @@
import gi
from gi.repository.GObject import SignalMatchType
from uberwriter.inline_preview import InlinePreview
from uberwriter.text_view_drag_drop_handler import DragDropHandler, TARGET_URI, TARGET_TEXT
from uberwriter.text_view_format_inserter import FormatInserter
from uberwriter.text_view_markup_handler import MarkupHandler
from uberwriter.text_view_scroller import TextViewScroller
from uberwriter.text_view_undo_redo_handler import UndoRedoHandler
from uberwriter.text_view_drag_drop_handler import DragDropHandler, TARGET_URI, TARGET_TEXT
gi.require_version('Gtk', '3.0')
gi.require_version('Gspell', '1')
@ -88,6 +87,7 @@ class TextView(Gtk.TextView):
# Markup
self.markup = MarkupHandler(self)
self.connect('style-updated', self.markup.on_style_updated)
self.connect('destroy', self.markup.stop)
# Preview popover
self.preview_popover = InlinePreview(self)

View File

@ -15,237 +15,307 @@
### END LICENSE
import re
from multiprocessing import Pipe, Process
import gi
from gi.overrides import GLib
from uberwriter import helpers, markup_regex
from uberwriter.markup_regex import STRIKETHROUGH, BOLD_ITALIC, BOLD, ITALIC, IMAGE, LINK, \
HORIZONTAL_RULE, LIST, ORDERED_LIST, BLOCK_QUOTE, HEADER, HEADER_UNDER, TABLE, MATH
gi.require_version('Gtk', '3.0')
from gi.repository import Gtk
from gi.repository import Gtk, GLib
from gi.repository import Pango
class MarkupHandler:
# Maximum number of characters for which to markup synchronously.
max_char_sync = 100000
TAG_NAME_ITALIC = 'italic'
TAG_NAME_BOLD = 'bold'
TAG_NAME_BOLD_ITALIC = 'bold_italic'
TAG_NAME_STRIKETHROUGH = 'strikethrough'
TAG_NAME_CENTER = 'center'
TAG_NAME_WRAP_NONE = 'wrap_none'
TAG_NAME_PLAIN_TEXT = 'plain_text'
TAG_NAME_GRAY_TEXT = 'gray_text'
TAG_NAME_MATH_TEXT = 'math_text'
TAG_NAME_UNFOCUSED_TEXT = 'unfocused_text'
TAG_NAME_MARGIN_INDENT = 'margin_indent'
def __init__(self, text_view):
self.text_view = text_view
self.text_buffer = text_view.get_buffer()
self.marked_up_text = None
# Styles
# Tags.
buffer = self.text_buffer
self.italic = buffer.create_tag('italic',
weight=Pango.Weight.NORMAL,
style=Pango.Style.ITALIC)
self.bold = buffer.create_tag('bold',
weight=Pango.Weight.BOLD,
style=Pango.Style.NORMAL)
self.bolditalic = buffer.create_tag('bolditalic',
weight=Pango.Weight.BOLD,
self.tag_italic = buffer.create_tag(self.TAG_NAME_ITALIC,
weight=Pango.Weight.NORMAL,
style=Pango.Style.ITALIC)
self.strikethrough = buffer.create_tag('strikethrough', strikethrough=True)
self.horizontalrule = buffer.create_tag('centertext',
justification=Gtk.Justification.CENTER)
self.plaintext = buffer.create_tag('plaintext',
weight=Pango.Weight.NORMAL,
style=Pango.Style.NORMAL,
strikethrough=False,
justification=Gtk.Justification.LEFT)
self.table = buffer.create_tag('table',
wrap_mode=Gtk.WrapMode.NONE,
pixels_above_lines=0,
pixels_below_lines=0)
self.mathtext = buffer.create_tag('mathtext',
weight=Pango.Weight.NORMAL,
style=Pango.Style.NORMAL,
strikethrough=False)
self.graytext = buffer.create_tag('graytext',
foreground='gray',
weight=Pango.Weight.NORMAL,
self.tag_bold = buffer.create_tag(self.TAG_NAME_BOLD,
weight=Pango.Weight.BOLD,
style=Pango.Style.NORMAL)
# Margin and indents
# A baseline margin is set to allow negative offsets for formatting headers, lists, etc
self.margins_indents = {}
self.tag_bold_italic = buffer.create_tag(self.TAG_NAME_BOLD_ITALIC,
weight=Pango.Weight.BOLD,
style=Pango.Style.ITALIC)
self.tag_strikethrough = buffer.create_tag(self.TAG_NAME_STRIKETHROUGH,
strikethrough=True)
self.tag_center = buffer.create_tag(self.TAG_NAME_CENTER,
justification=Gtk.Justification.CENTER)
self.tag_wrap_none = buffer.create_tag(self.TAG_NAME_WRAP_NONE,
wrap_mode=Gtk.WrapMode.NONE,
pixels_above_lines=0,
pixels_below_lines=0)
self.tag_plain_text = buffer.create_tag(self.TAG_NAME_PLAIN_TEXT,
weight=Pango.Weight.NORMAL,
style=Pango.Style.NORMAL,
strikethrough=False,
justification=Gtk.Justification.LEFT)
self.tag_gray_text = buffer.create_tag(self.TAG_NAME_GRAY_TEXT,
foreground='gray',
weight=Pango.Weight.NORMAL,
style=Pango.Style.NORMAL)
self.tag_math_text = buffer.create_tag(self.TAG_NAME_MATH_TEXT,
weight=Pango.Weight.NORMAL,
style=Pango.Style.NORMAL,
strikethrough=False)
self.tags_markup = {
self.TAG_NAME_ITALIC: lambda args: self.tag_italic,
self.TAG_NAME_BOLD: lambda args: self.tag_bold,
self.TAG_NAME_BOLD_ITALIC: lambda args: self.tag_bold_italic,
self.TAG_NAME_STRIKETHROUGH: lambda args: self.tag_strikethrough,
self.TAG_NAME_CENTER: lambda args: self.tag_center,
self.TAG_NAME_WRAP_NONE: lambda args: self.tag_wrap_none,
self.TAG_NAME_PLAIN_TEXT: lambda args: self.tag_plain_text,
self.TAG_NAME_GRAY_TEXT: lambda args: self.tag_gray_text,
self.TAG_NAME_MATH_TEXT: lambda args: self.tag_math_text,
self.TAG_NAME_MARGIN_INDENT: lambda args: self.get_margin_indent_tag(*args)
}
# Focus mode.
self.tag_unfocused_text = buffer.create_tag(self.TAG_NAME_UNFOCUSED_TEXT,
foreground='gray',
weight=Pango.Weight.NORMAL,
style=Pango.Style.NORMAL)
# Margin and indents.
# A baseline margin is set to allow negative offsets for formatting headers, lists, etc.
self.tags_margins_indents = {}
self.baseline_margin = 0
self.char_width = 0
self.update_margins_indents()
# Style
# Style.
self.on_style_updated()
self.version = 0
# Worker process to handle parsing.
self.parsing = False
self.apply_pending = False
self.parent_conn, child_conn = Pipe()
Process(target=self.parse, args=(child_conn,), daemon=True).start()
GLib.io_add_watch(
self.parent_conn.fileno(), GLib.PRIORITY_DEFAULT, GLib.IO_IN, self.on_parsed)
def on_style_updated(self, *_):
(found, color) = self.text_view.get_style_context().lookup_color('math_text_color')
if not found:
(_, color) = self.text_view.get_style_context().lookup_color('foreground_color')
self.mathtext.set_property("foreground", color.to_string())
self.tag_math_text.set_property("foreground", color.to_string())
def apply(self):
self.version = self.version + 1
if self.text_buffer.get_char_count() < self.max_char_sync:
self.do_apply()
else:
GLib.idle_add(self.do_apply, self.version)
"""Applies markup, parsing it in a worker process if the text has changed.
def do_apply(self, version=None):
if version is not None and version != self.version:
return
In case parsing is already running, it will re-apply once it finishes. This ensure that
the pipe doesn't fill (and block) if multiple requests are made in quick succession."""
if not self.parsing:
self.parsing = True
self.apply_pending = False
text = self.text_buffer.get_slice(
self.text_buffer.get_start_iter(), self.text_buffer.get_end_iter(), False)
if text != self.marked_up_text:
self.parent_conn.send(text)
else:
self.do_apply(text)
else:
self.apply_pending = True
def parse(self, child_conn):
"""Parses markup in a worker process."""
while True:
while True:
try:
text = child_conn.recv()
if not child_conn.poll():
break
except EOFError:
child_conn.close()
return
# List of tuples in the form (tag_name, tag_args, tag_start, tag_end).
result = []
# Find:
# - "_italic_" (italic)
# - "**bold**" (bold)
# - "***bolditalic***" (bold/italic)
# - "~~strikethrough~~" (strikethrough)
# - "$math$" (colorize)
# - "---" table (wrap/pixels)
regexps = (
(ITALIC, self.TAG_NAME_ITALIC),
(BOLD, self.TAG_NAME_BOLD),
(BOLD_ITALIC, self.TAG_NAME_BOLD_ITALIC),
(STRIKETHROUGH, self.TAG_NAME_STRIKETHROUGH),
(MATH, self.TAG_NAME_MATH_TEXT),
(TABLE, self.TAG_NAME_WRAP_NONE)
)
for regexp, tag_name in regexps:
matches = re.finditer(regexp, text)
for match in matches:
result.append((tag_name, (), match.start(), match.end()))
# Find:
# - "[description](url)" (gray out)
# - "![description](image_url)" (gray out)
regexps = (
(LINK, self.TAG_NAME_GRAY_TEXT),
(IMAGE, self.TAG_NAME_GRAY_TEXT)
)
for regexp, tag_name in regexps:
matches = re.finditer(regexp, text)
for match in matches:
result.append((tag_name, (), match.start(), match.start("text")))
result.append((tag_name, (), match.end("text"), match.end()))
# Find "---" horizontal rule (center).
matches = re.finditer(HORIZONTAL_RULE, text)
for match in matches:
result.append((
self.TAG_NAME_CENTER, (), match.start("symbols"), match.end("symbols")))
# Find "* list" (offset).
matches = re.finditer(LIST, text)
for match in matches:
# Lists use character+space (eg. "* ").
length = 2
nest = len(match.group("indent").replace(" ", "\t"))
margin = -length - 2 * nest
indent = -length - 2 * length * nest
result.append((
self.TAG_NAME_MARGIN_INDENT,
(margin, indent),
match.start("content"),
match.end("content")
))
# Find "1. ordered list" (offset).
matches = re.finditer(ORDERED_LIST, text)
for match in matches:
# Ordered lists use numbers/letters+dot/parens+space (eg. "123. ").
length = len(match.group("prefix")) + 1
nest = len(match.group("indent").replace(" ", "\t"))
margin = -length - 2 * nest
indent = -length - 2 * length * nest
result.append((
self.TAG_NAME_MARGIN_INDENT,
(margin, indent),
match.start("content"),
match.end("content")
))
# Find "> blockquote" (offset).
matches = re.finditer(BLOCK_QUOTE, text)
for match in matches:
result.append((self.TAG_NAME_MARGIN_INDENT, (2, -2), match.start(), match.end()))
# Find "# Header" (offset+bold).
matches = re.finditer(HEADER, text)
for match in matches:
margin = -len(match.group("level")) - 1
result.append(
(self.TAG_NAME_MARGIN_INDENT, (margin, 0), match.start(), match.end()))
result.append((self.TAG_NAME_BOLD, (), match.start(), match.end()))
# Find "=======" header underline (bold).
matches = re.finditer(HEADER_UNDER, text)
for match in matches:
result.append((self.TAG_NAME_BOLD, (), match.start(), match.end()))
# Find "```" code tag (offset+remove other markup).
matches = re.finditer(markup_regex.CODE_BLOCK, text)
for match in matches:
result.append((
self.TAG_NAME_MARGIN_INDENT, (0, 2), match.start("block"), match.end("block")))
result.append((
self.TAG_NAME_PLAIN_TEXT, (), match.start("block"), match.end("block")))
# Send parsed data back.
child_conn.send((text, result))
def on_parsed(self, _source, _condition):
"""Reads the parsing result from the pipe and triggers any pending apply."""
self.parsing = False
if self.apply_pending:
self.apply() # self.apply clears the apply pending flag.
try:
if self.parent_conn.poll():
self.do_apply(*self.parent_conn.recv())
return True
except EOFError:
return False
def do_apply(self, original_text, result=[]):
"""Applies the result of parsing if the current text matches the original text."""
buffer = self.text_buffer
start = buffer.get_start_iter()
end = buffer.get_end_iter()
text = buffer.get_slice(start, end, False)
text = self.text_buffer.get_slice(start, end, False)
# Remove tags
buffer.remove_tag(self.italic, start, end)
buffer.remove_tag(self.bold, start, end)
buffer.remove_tag(self.bolditalic, start, end)
buffer.remove_tag(self.strikethrough, start, end)
buffer.remove_tag(self.horizontalrule, start, end)
buffer.remove_tag(self.plaintext, start, end)
buffer.remove_tag(self.table, start, end)
buffer.remove_tag(self.mathtext, start, end)
for tag in self.margins_indents.values():
buffer.remove_tag(tag, start, end)
buffer.remove_tag(self.graytext, start, end)
# Apply markup tags.
if text == original_text and text != self.marked_up_text:
buffer.remove_tag(self.tag_italic, start, end)
buffer.remove_tag(self.tag_bold, start, end)
buffer.remove_tag(self.tag_bold_italic, start, end)
buffer.remove_tag(self.tag_strikethrough, start, end)
buffer.remove_tag(self.tag_center, start, end)
buffer.remove_tag(self.tag_plain_text, start, end)
buffer.remove_tag(self.tag_gray_text, start, end)
buffer.remove_tag(self.tag_math_text, start, end)
buffer.remove_tag(self.tag_wrap_none, start, end)
for tag in self.tags_margins_indents.values():
buffer.remove_tag(tag, start, end)
# Apply "_italic_" tag (italic)
matches = re.finditer(markup_regex.ITALIC, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start())
end_iter = buffer.get_iter_at_offset(match.end())
buffer.apply_tag(self.italic, start_iter, end_iter)
for tag_name, tag_args, tag_start, tag_end in result:
buffer.apply_tag(
self.tags_markup[tag_name](tag_args),
buffer.get_iter_at_offset(tag_start),
buffer.get_iter_at_offset(tag_end))
# Apply "**bold**" tag (bold)
matches = re.finditer(markup_regex.BOLD, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start())
end_iter = buffer.get_iter_at_offset(match.end())
buffer.apply_tag(self.bold, start_iter, end_iter)
# Apply "***bolditalic***" tag (bold/italic)
matches = re.finditer(markup_regex.BOLD_ITALIC, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start())
end_iter = buffer.get_iter_at_offset(match.end())
buffer.apply_tag(self.bolditalic, start_iter, end_iter)
# Apply "~~strikethrough~~" tag (strikethrough)
matches = re.finditer(markup_regex.STRIKETHROUGH, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start())
end_iter = buffer.get_iter_at_offset(match.end())
buffer.apply_tag(self.strikethrough, start_iter, end_iter)
# Apply "[description](url)" (gray out)
matches = re.finditer(markup_regex.LINK, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start("text") - 1)
end_iter = start_iter.copy()
end_iter.forward_char()
buffer.apply_tag(self.graytext, start_iter, end_iter)
start_iter = buffer.get_iter_at_offset(match.start("url") - 2)
end_iter = buffer.get_iter_at_offset(match.end("url") + 1)
buffer.apply_tag(self.graytext, start_iter, end_iter)
# Apply "---" horizontal rule tag (center)
matches = re.finditer(markup_regex.HORIZONTAL_RULE, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start("symbols"))
end_iter = buffer.get_iter_at_offset(match.end("symbols"))
buffer.apply_tag(self.horizontalrule, start_iter, end_iter)
# Apply "* list" tag (offset)
matches = re.finditer(markup_regex.LIST, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start())
end_iter = buffer.get_iter_at_offset(match.end())
# Lists use character+space (eg. "* ")
length = 2
nest = len(match.group("indent").replace(" ", "\t"))
margin = -length - 2 * nest
indent = -length - 2 * length * nest
buffer.apply_tag(self.get_margin_indent_tag(margin, indent), start_iter, end_iter)
# Apply "1. ordered list" tag (offset)
matches = re.finditer(markup_regex.ORDERED_LIST, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start())
end_iter = buffer.get_iter_at_offset(match.end())
# Numeric lists use numbers/letters+dot/parens+space (eg. "123. ")
length = len(match.group("prefix")) + 1
nest = len(match.group("indent").replace(" ", "\t"))
margin = -length - 2 * nest
indent = -length - 2 * length * nest
buffer.apply_tag(self.get_margin_indent_tag(margin, indent), start_iter, end_iter)
# Apply "> blockquote" tag (offset)
matches = re.finditer(markup_regex.BLOCK_QUOTE, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start())
end_iter = buffer.get_iter_at_offset(match.end())
buffer.apply_tag(self.get_margin_indent_tag(2, -2), start_iter, end_iter)
# Apply "#" tag (bold)
matches = re.finditer(markup_regex.HEADER, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start())
end_iter = buffer.get_iter_at_offset(match.end())
margin = -len(match.group("level")) - 1
buffer.apply_tag(self.get_margin_indent_tag(margin, 0), start_iter, end_iter)
buffer.apply_tag(self.bold, start_iter, end_iter)
# Apply "======" header underline tag (bold)
matches = re.finditer(markup_regex.HEADER_UNDER, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start())
end_iter = buffer.get_iter_at_offset(match.end())
buffer.apply_tag(self.bold, start_iter, end_iter)
# Apply "```" code tag (offset)
matches = re.finditer(markup_regex.CODE_BLOCK, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start("block"))
end_iter = buffer.get_iter_at_offset(match.end("block"))
buffer.apply_tag(self.get_margin_indent_tag(0, 2), start_iter, end_iter)
buffer.apply_tag(self.plaintext, start_iter, end_iter)
# Apply "---" table tag (wrap/pixels)
matches = re.finditer(markup_regex.TABLE, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start())
end_iter = buffer.get_iter_at_offset(match.end())
buffer.apply_tag(self.table, start_iter, end_iter)
# Apply "$math$" tag (colorize)
matches = re.finditer(markup_regex.MATH, text)
for match in matches:
start_iter = buffer.get_iter_at_offset(match.start())
end_iter = buffer.get_iter_at_offset(match.end())
buffer.apply_tag(self.mathtext, start_iter, end_iter)
# Apply focus mode tag (grey out before/after current sentence)
# Apply focus mode tag (grey out before/after current sentence).
buffer.remove_tag(self.tag_unfocused_text, start, end)
if self.text_view.focus_mode:
cursor_iter = buffer.get_iter_at_mark(buffer.get_insert())
start_sentence = cursor_iter.copy()
start_sentence.backward_sentence_start()
end_sentence = cursor_iter.copy()
end_sentence.forward_sentence_end()
buffer.apply_tag(self.graytext, start, start_sentence)
buffer.apply_tag(self.graytext, end_sentence, end)
buffer.apply_tag(self.tag_unfocused_text, start, start_sentence)
buffer.apply_tag(self.tag_unfocused_text, end_sentence, end)
# Margin and indent are cumulative. They differ in two ways:
# * Margin is always in the beginning, which means it effectively only affects the first line
@ -254,15 +324,15 @@ class MarkupHandler:
# Indent is always positive, or 0.
def get_margin_indent_tag(self, margin_level, indent_level):
level = (margin_level, indent_level)
if level not in self.margins_indents:
if level not in self.tags_margins_indents:
margin, indent = self.get_margin_indent(margin_level, indent_level)
tag = self.text_buffer.create_tag(
"margin_indent_{}_{}".format(margin_level, indent_level),
left_margin=margin, indent=indent)
self.margins_indents[level] = tag
self.tags_margins_indents[level] = tag
return tag
else:
return self.margins_indents[level]
return self.tags_margins_indents[level]
def get_margin_indent(self, margin_level, indent_level, baseline_margin=None, char_width=None):
if baseline_margin is None:
@ -289,6 +359,9 @@ class MarkupHandler:
self.text_view.set_tabs(tab_array)
# Adjust margins and indents
for level, tag in self.margins_indents.items():
for level, tag in self.tags_margins_indents.items():
margin, indent = self.get_margin_indent(*level, baseline_margin, char_width)
tag.set_properties(left_margin=margin, indent=indent)
def stop(self, *_):
self.parent_conn.close()