From 931d92bdfdb683b0ed7d6a0652de3244ceadd916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo=20Silva?= Date: Sun, 9 Jun 2019 03:29:09 +0100 Subject: [PATCH] Move markup to a worker process A worker thread works in practice, but the GIL takes a significant toll as the code is computationally heavy. The result is a hogged UI, specially when other threads are involved (eg. stats counter). A worker process is faster, hogs the UI significantly less, at the cost of slightly higher memory usage. As side-effects: - UberWriter can now comfortably handle much larger documents - Text selection is much more responsive --- uberwriter/text_view.py | 4 +- uberwriter/text_view_markup_handler.py | 447 ++++++++++++++----------- 2 files changed, 262 insertions(+), 189 deletions(-) diff --git a/uberwriter/text_view.py b/uberwriter/text_view.py index 22baa2b..5f8ea15 100644 --- a/uberwriter/text_view.py +++ b/uberwriter/text_view.py @@ -1,12 +1,11 @@ import gi -from gi.repository.GObject import SignalMatchType from uberwriter.inline_preview import InlinePreview +from uberwriter.text_view_drag_drop_handler import DragDropHandler, TARGET_URI, TARGET_TEXT from uberwriter.text_view_format_inserter import FormatInserter from uberwriter.text_view_markup_handler import MarkupHandler from uberwriter.text_view_scroller import TextViewScroller from uberwriter.text_view_undo_redo_handler import UndoRedoHandler -from uberwriter.text_view_drag_drop_handler import DragDropHandler, TARGET_URI, TARGET_TEXT gi.require_version('Gtk', '3.0') gi.require_version('Gspell', '1') @@ -88,6 +87,7 @@ class TextView(Gtk.TextView): # Markup self.markup = MarkupHandler(self) self.connect('style-updated', self.markup.on_style_updated) + self.connect('destroy', self.markup.stop) # Preview popover self.preview_popover = InlinePreview(self) diff --git a/uberwriter/text_view_markup_handler.py b/uberwriter/text_view_markup_handler.py index 97798fe..04da155 100644 --- a/uberwriter/text_view_markup_handler.py +++ b/uberwriter/text_view_markup_handler.py @@ -15,237 +15,307 @@ ### END LICENSE import re +from multiprocessing import Pipe, Process import gi -from gi.overrides import GLib from uberwriter import helpers, markup_regex +from uberwriter.markup_regex import STRIKETHROUGH, BOLD_ITALIC, BOLD, ITALIC, IMAGE, LINK, \ + HORIZONTAL_RULE, LIST, ORDERED_LIST, BLOCK_QUOTE, HEADER, HEADER_UNDER, TABLE, MATH gi.require_version('Gtk', '3.0') -from gi.repository import Gtk +from gi.repository import Gtk, GLib from gi.repository import Pango class MarkupHandler: - # Maximum number of characters for which to markup synchronously. - max_char_sync = 100000 + TAG_NAME_ITALIC = 'italic' + TAG_NAME_BOLD = 'bold' + TAG_NAME_BOLD_ITALIC = 'bold_italic' + TAG_NAME_STRIKETHROUGH = 'strikethrough' + TAG_NAME_CENTER = 'center' + TAG_NAME_WRAP_NONE = 'wrap_none' + TAG_NAME_PLAIN_TEXT = 'plain_text' + TAG_NAME_GRAY_TEXT = 'gray_text' + TAG_NAME_MATH_TEXT = 'math_text' + TAG_NAME_UNFOCUSED_TEXT = 'unfocused_text' + TAG_NAME_MARGIN_INDENT = 'margin_indent' def __init__(self, text_view): self.text_view = text_view self.text_buffer = text_view.get_buffer() + self.marked_up_text = None - # Styles + # Tags. buffer = self.text_buffer - self.italic = buffer.create_tag('italic', - weight=Pango.Weight.NORMAL, - style=Pango.Style.ITALIC) - - self.bold = buffer.create_tag('bold', - weight=Pango.Weight.BOLD, - style=Pango.Style.NORMAL) - - self.bolditalic = buffer.create_tag('bolditalic', - weight=Pango.Weight.BOLD, + self.tag_italic = buffer.create_tag(self.TAG_NAME_ITALIC, + weight=Pango.Weight.NORMAL, style=Pango.Style.ITALIC) - self.strikethrough = buffer.create_tag('strikethrough', strikethrough=True) - - self.horizontalrule = buffer.create_tag('centertext', - justification=Gtk.Justification.CENTER) - - self.plaintext = buffer.create_tag('plaintext', - weight=Pango.Weight.NORMAL, - style=Pango.Style.NORMAL, - strikethrough=False, - justification=Gtk.Justification.LEFT) - - self.table = buffer.create_tag('table', - wrap_mode=Gtk.WrapMode.NONE, - pixels_above_lines=0, - pixels_below_lines=0) - - self.mathtext = buffer.create_tag('mathtext', - weight=Pango.Weight.NORMAL, - style=Pango.Style.NORMAL, - strikethrough=False) - - self.graytext = buffer.create_tag('graytext', - foreground='gray', - weight=Pango.Weight.NORMAL, + self.tag_bold = buffer.create_tag(self.TAG_NAME_BOLD, + weight=Pango.Weight.BOLD, style=Pango.Style.NORMAL) - # Margin and indents - # A baseline margin is set to allow negative offsets for formatting headers, lists, etc - self.margins_indents = {} + self.tag_bold_italic = buffer.create_tag(self.TAG_NAME_BOLD_ITALIC, + weight=Pango.Weight.BOLD, + style=Pango.Style.ITALIC) + + self.tag_strikethrough = buffer.create_tag(self.TAG_NAME_STRIKETHROUGH, + strikethrough=True) + + self.tag_center = buffer.create_tag(self.TAG_NAME_CENTER, + justification=Gtk.Justification.CENTER) + + self.tag_wrap_none = buffer.create_tag(self.TAG_NAME_WRAP_NONE, + wrap_mode=Gtk.WrapMode.NONE, + pixels_above_lines=0, + pixels_below_lines=0) + + self.tag_plain_text = buffer.create_tag(self.TAG_NAME_PLAIN_TEXT, + weight=Pango.Weight.NORMAL, + style=Pango.Style.NORMAL, + strikethrough=False, + justification=Gtk.Justification.LEFT) + + self.tag_gray_text = buffer.create_tag(self.TAG_NAME_GRAY_TEXT, + foreground='gray', + weight=Pango.Weight.NORMAL, + style=Pango.Style.NORMAL) + + self.tag_math_text = buffer.create_tag(self.TAG_NAME_MATH_TEXT, + weight=Pango.Weight.NORMAL, + style=Pango.Style.NORMAL, + strikethrough=False) + + self.tags_markup = { + self.TAG_NAME_ITALIC: lambda args: self.tag_italic, + self.TAG_NAME_BOLD: lambda args: self.tag_bold, + self.TAG_NAME_BOLD_ITALIC: lambda args: self.tag_bold_italic, + self.TAG_NAME_STRIKETHROUGH: lambda args: self.tag_strikethrough, + self.TAG_NAME_CENTER: lambda args: self.tag_center, + self.TAG_NAME_WRAP_NONE: lambda args: self.tag_wrap_none, + self.TAG_NAME_PLAIN_TEXT: lambda args: self.tag_plain_text, + self.TAG_NAME_GRAY_TEXT: lambda args: self.tag_gray_text, + self.TAG_NAME_MATH_TEXT: lambda args: self.tag_math_text, + self.TAG_NAME_MARGIN_INDENT: lambda args: self.get_margin_indent_tag(*args) + } + + # Focus mode. + self.tag_unfocused_text = buffer.create_tag(self.TAG_NAME_UNFOCUSED_TEXT, + foreground='gray', + weight=Pango.Weight.NORMAL, + style=Pango.Style.NORMAL) + + # Margin and indents. + # A baseline margin is set to allow negative offsets for formatting headers, lists, etc. + self.tags_margins_indents = {} self.baseline_margin = 0 self.char_width = 0 self.update_margins_indents() - # Style + # Style. self.on_style_updated() - self.version = 0 + # Worker process to handle parsing. + self.parsing = False + self.apply_pending = False + self.parent_conn, child_conn = Pipe() + Process(target=self.parse, args=(child_conn,), daemon=True).start() + GLib.io_add_watch( + self.parent_conn.fileno(), GLib.PRIORITY_DEFAULT, GLib.IO_IN, self.on_parsed) def on_style_updated(self, *_): (found, color) = self.text_view.get_style_context().lookup_color('math_text_color') if not found: (_, color) = self.text_view.get_style_context().lookup_color('foreground_color') - self.mathtext.set_property("foreground", color.to_string()) + self.tag_math_text.set_property("foreground", color.to_string()) def apply(self): - self.version = self.version + 1 - if self.text_buffer.get_char_count() < self.max_char_sync: - self.do_apply() - else: - GLib.idle_add(self.do_apply, self.version) + """Applies markup, parsing it in a worker process if the text has changed. - def do_apply(self, version=None): - if version is not None and version != self.version: - return + In case parsing is already running, it will re-apply once it finishes. This ensure that + the pipe doesn't fill (and block) if multiple requests are made in quick succession.""" + + if not self.parsing: + self.parsing = True + self.apply_pending = False + + text = self.text_buffer.get_slice( + self.text_buffer.get_start_iter(), self.text_buffer.get_end_iter(), False) + if text != self.marked_up_text: + self.parent_conn.send(text) + else: + self.do_apply(text) + else: + self.apply_pending = True + + def parse(self, child_conn): + """Parses markup in a worker process.""" + + while True: + while True: + try: + text = child_conn.recv() + if not child_conn.poll(): + break + except EOFError: + child_conn.close() + return + + # List of tuples in the form (tag_name, tag_args, tag_start, tag_end). + result = [] + + # Find: + # - "_italic_" (italic) + # - "**bold**" (bold) + # - "***bolditalic***" (bold/italic) + # - "~~strikethrough~~" (strikethrough) + # - "$math$" (colorize) + # - "---" table (wrap/pixels) + regexps = ( + (ITALIC, self.TAG_NAME_ITALIC), + (BOLD, self.TAG_NAME_BOLD), + (BOLD_ITALIC, self.TAG_NAME_BOLD_ITALIC), + (STRIKETHROUGH, self.TAG_NAME_STRIKETHROUGH), + (MATH, self.TAG_NAME_MATH_TEXT), + (TABLE, self.TAG_NAME_WRAP_NONE) + ) + for regexp, tag_name in regexps: + matches = re.finditer(regexp, text) + for match in matches: + result.append((tag_name, (), match.start(), match.end())) + + # Find: + # - "[description](url)" (gray out) + # - "![description](image_url)" (gray out) + regexps = ( + (LINK, self.TAG_NAME_GRAY_TEXT), + (IMAGE, self.TAG_NAME_GRAY_TEXT) + ) + for regexp, tag_name in regexps: + matches = re.finditer(regexp, text) + for match in matches: + result.append((tag_name, (), match.start(), match.start("text"))) + result.append((tag_name, (), match.end("text"), match.end())) + + # Find "---" horizontal rule (center). + matches = re.finditer(HORIZONTAL_RULE, text) + for match in matches: + result.append(( + self.TAG_NAME_CENTER, (), match.start("symbols"), match.end("symbols"))) + + # Find "* list" (offset). + matches = re.finditer(LIST, text) + for match in matches: + # Lists use character+space (eg. "* "). + length = 2 + nest = len(match.group("indent").replace(" ", "\t")) + margin = -length - 2 * nest + indent = -length - 2 * length * nest + result.append(( + self.TAG_NAME_MARGIN_INDENT, + (margin, indent), + match.start("content"), + match.end("content") + )) + + # Find "1. ordered list" (offset). + matches = re.finditer(ORDERED_LIST, text) + for match in matches: + # Ordered lists use numbers/letters+dot/parens+space (eg. "123. "). + length = len(match.group("prefix")) + 1 + nest = len(match.group("indent").replace(" ", "\t")) + margin = -length - 2 * nest + indent = -length - 2 * length * nest + result.append(( + self.TAG_NAME_MARGIN_INDENT, + (margin, indent), + match.start("content"), + match.end("content") + )) + + # Find "> blockquote" (offset). + matches = re.finditer(BLOCK_QUOTE, text) + for match in matches: + result.append((self.TAG_NAME_MARGIN_INDENT, (2, -2), match.start(), match.end())) + + # Find "# Header" (offset+bold). + matches = re.finditer(HEADER, text) + for match in matches: + margin = -len(match.group("level")) - 1 + result.append( + (self.TAG_NAME_MARGIN_INDENT, (margin, 0), match.start(), match.end())) + result.append((self.TAG_NAME_BOLD, (), match.start(), match.end())) + + # Find "=======" header underline (bold). + matches = re.finditer(HEADER_UNDER, text) + for match in matches: + result.append((self.TAG_NAME_BOLD, (), match.start(), match.end())) + + # Find "```" code tag (offset+remove other markup). + matches = re.finditer(markup_regex.CODE_BLOCK, text) + for match in matches: + result.append(( + self.TAG_NAME_MARGIN_INDENT, (0, 2), match.start("block"), match.end("block"))) + result.append(( + self.TAG_NAME_PLAIN_TEXT, (), match.start("block"), match.end("block"))) + + # Send parsed data back. + child_conn.send((text, result)) + + def on_parsed(self, _source, _condition): + """Reads the parsing result from the pipe and triggers any pending apply.""" + + self.parsing = False + if self.apply_pending: + self.apply() # self.apply clears the apply pending flag. + + try: + if self.parent_conn.poll(): + self.do_apply(*self.parent_conn.recv()) + return True + except EOFError: + return False + + def do_apply(self, original_text, result=[]): + """Applies the result of parsing if the current text matches the original text.""" buffer = self.text_buffer start = buffer.get_start_iter() end = buffer.get_end_iter() - text = buffer.get_slice(start, end, False) + text = self.text_buffer.get_slice(start, end, False) - # Remove tags - buffer.remove_tag(self.italic, start, end) - buffer.remove_tag(self.bold, start, end) - buffer.remove_tag(self.bolditalic, start, end) - buffer.remove_tag(self.strikethrough, start, end) - buffer.remove_tag(self.horizontalrule, start, end) - buffer.remove_tag(self.plaintext, start, end) - buffer.remove_tag(self.table, start, end) - buffer.remove_tag(self.mathtext, start, end) - for tag in self.margins_indents.values(): - buffer.remove_tag(tag, start, end) - buffer.remove_tag(self.graytext, start, end) + # Apply markup tags. + if text == original_text and text != self.marked_up_text: + buffer.remove_tag(self.tag_italic, start, end) + buffer.remove_tag(self.tag_bold, start, end) + buffer.remove_tag(self.tag_bold_italic, start, end) + buffer.remove_tag(self.tag_strikethrough, start, end) + buffer.remove_tag(self.tag_center, start, end) + buffer.remove_tag(self.tag_plain_text, start, end) + buffer.remove_tag(self.tag_gray_text, start, end) + buffer.remove_tag(self.tag_math_text, start, end) + buffer.remove_tag(self.tag_wrap_none, start, end) + for tag in self.tags_margins_indents.values(): + buffer.remove_tag(tag, start, end) - # Apply "_italic_" tag (italic) - matches = re.finditer(markup_regex.ITALIC, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start()) - end_iter = buffer.get_iter_at_offset(match.end()) - buffer.apply_tag(self.italic, start_iter, end_iter) + for tag_name, tag_args, tag_start, tag_end in result: + buffer.apply_tag( + self.tags_markup[tag_name](tag_args), + buffer.get_iter_at_offset(tag_start), + buffer.get_iter_at_offset(tag_end)) - # Apply "**bold**" tag (bold) - matches = re.finditer(markup_regex.BOLD, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start()) - end_iter = buffer.get_iter_at_offset(match.end()) - buffer.apply_tag(self.bold, start_iter, end_iter) - - # Apply "***bolditalic***" tag (bold/italic) - matches = re.finditer(markup_regex.BOLD_ITALIC, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start()) - end_iter = buffer.get_iter_at_offset(match.end()) - buffer.apply_tag(self.bolditalic, start_iter, end_iter) - - # Apply "~~strikethrough~~" tag (strikethrough) - matches = re.finditer(markup_regex.STRIKETHROUGH, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start()) - end_iter = buffer.get_iter_at_offset(match.end()) - buffer.apply_tag(self.strikethrough, start_iter, end_iter) - - # Apply "[description](url)" (gray out) - matches = re.finditer(markup_regex.LINK, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start("text") - 1) - end_iter = start_iter.copy() - end_iter.forward_char() - buffer.apply_tag(self.graytext, start_iter, end_iter) - start_iter = buffer.get_iter_at_offset(match.start("url") - 2) - end_iter = buffer.get_iter_at_offset(match.end("url") + 1) - buffer.apply_tag(self.graytext, start_iter, end_iter) - - # Apply "---" horizontal rule tag (center) - matches = re.finditer(markup_regex.HORIZONTAL_RULE, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start("symbols")) - end_iter = buffer.get_iter_at_offset(match.end("symbols")) - buffer.apply_tag(self.horizontalrule, start_iter, end_iter) - - # Apply "* list" tag (offset) - matches = re.finditer(markup_regex.LIST, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start()) - end_iter = buffer.get_iter_at_offset(match.end()) - # Lists use character+space (eg. "* ") - length = 2 - nest = len(match.group("indent").replace(" ", "\t")) - margin = -length - 2 * nest - indent = -length - 2 * length * nest - buffer.apply_tag(self.get_margin_indent_tag(margin, indent), start_iter, end_iter) - - # Apply "1. ordered list" tag (offset) - matches = re.finditer(markup_regex.ORDERED_LIST, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start()) - end_iter = buffer.get_iter_at_offset(match.end()) - # Numeric lists use numbers/letters+dot/parens+space (eg. "123. ") - length = len(match.group("prefix")) + 1 - nest = len(match.group("indent").replace(" ", "\t")) - margin = -length - 2 * nest - indent = -length - 2 * length * nest - buffer.apply_tag(self.get_margin_indent_tag(margin, indent), start_iter, end_iter) - - # Apply "> blockquote" tag (offset) - matches = re.finditer(markup_regex.BLOCK_QUOTE, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start()) - end_iter = buffer.get_iter_at_offset(match.end()) - buffer.apply_tag(self.get_margin_indent_tag(2, -2), start_iter, end_iter) - - # Apply "#" tag (bold) - matches = re.finditer(markup_regex.HEADER, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start()) - end_iter = buffer.get_iter_at_offset(match.end()) - margin = -len(match.group("level")) - 1 - buffer.apply_tag(self.get_margin_indent_tag(margin, 0), start_iter, end_iter) - buffer.apply_tag(self.bold, start_iter, end_iter) - - # Apply "======" header underline tag (bold) - matches = re.finditer(markup_regex.HEADER_UNDER, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start()) - end_iter = buffer.get_iter_at_offset(match.end()) - buffer.apply_tag(self.bold, start_iter, end_iter) - - # Apply "```" code tag (offset) - matches = re.finditer(markup_regex.CODE_BLOCK, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start("block")) - end_iter = buffer.get_iter_at_offset(match.end("block")) - buffer.apply_tag(self.get_margin_indent_tag(0, 2), start_iter, end_iter) - buffer.apply_tag(self.plaintext, start_iter, end_iter) - - # Apply "---" table tag (wrap/pixels) - matches = re.finditer(markup_regex.TABLE, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start()) - end_iter = buffer.get_iter_at_offset(match.end()) - buffer.apply_tag(self.table, start_iter, end_iter) - - # Apply "$math$" tag (colorize) - matches = re.finditer(markup_regex.MATH, text) - for match in matches: - start_iter = buffer.get_iter_at_offset(match.start()) - end_iter = buffer.get_iter_at_offset(match.end()) - buffer.apply_tag(self.mathtext, start_iter, end_iter) - - # Apply focus mode tag (grey out before/after current sentence) + # Apply focus mode tag (grey out before/after current sentence). + buffer.remove_tag(self.tag_unfocused_text, start, end) if self.text_view.focus_mode: cursor_iter = buffer.get_iter_at_mark(buffer.get_insert()) start_sentence = cursor_iter.copy() start_sentence.backward_sentence_start() end_sentence = cursor_iter.copy() end_sentence.forward_sentence_end() - buffer.apply_tag(self.graytext, start, start_sentence) - buffer.apply_tag(self.graytext, end_sentence, end) + buffer.apply_tag(self.tag_unfocused_text, start, start_sentence) + buffer.apply_tag(self.tag_unfocused_text, end_sentence, end) # Margin and indent are cumulative. They differ in two ways: # * Margin is always in the beginning, which means it effectively only affects the first line @@ -254,15 +324,15 @@ class MarkupHandler: # Indent is always positive, or 0. def get_margin_indent_tag(self, margin_level, indent_level): level = (margin_level, indent_level) - if level not in self.margins_indents: + if level not in self.tags_margins_indents: margin, indent = self.get_margin_indent(margin_level, indent_level) tag = self.text_buffer.create_tag( "margin_indent_{}_{}".format(margin_level, indent_level), left_margin=margin, indent=indent) - self.margins_indents[level] = tag + self.tags_margins_indents[level] = tag return tag else: - return self.margins_indents[level] + return self.tags_margins_indents[level] def get_margin_indent(self, margin_level, indent_level, baseline_margin=None, char_width=None): if baseline_margin is None: @@ -289,6 +359,9 @@ class MarkupHandler: self.text_view.set_tabs(tab_array) # Adjust margins and indents - for level, tag in self.margins_indents.items(): + for level, tag in self.tags_margins_indents.items(): margin, indent = self.get_margin_indent(*level, baseline_margin, char_width) tag.set_properties(left_margin=margin, indent=indent) + + def stop(self, *_): + self.parent_conn.close()