Fix character count with horizontal rules

Pandoc's conversion to plain text converts horizontal rules to a sequence of 72 dashes. This update ensures that subsequent dashes are ignored when counting characters.
2019-05-01 18:55:24 +01:00 · 2019-05-01 18:55:24 +01:00 · 241ba567e4
parent 9238a82d4d
commit 241ba567e4
1 changed files with 5 additions and 2 deletions
--- a/uberwriter/stats_counter.py
+++ b/uberwriter/stats_counter.py
@ -11,8 +11,11 @@ from uberwriter import helpers
 class StatsCounter:
    """Counts characters, words, sentences and read time using a background thread."""

-    # Regexp that matches any character, except for newlines and subsequent spaces.
-    CHARACTERS = re.compile(r"[^\s]|(?:[^\S\n](?!\s))")
+    # Regexp that matches characters, with the following exceptions:
+    # * Newlines
+    # * Sequential spaces
+    # * Sequential dashes
+    CHARACTERS = re.compile(r"[^\s-]|(?:[^\S\n](?!\s)|-(?![-\n]))")

    # Regexp that matches Asian letters, general symbols and hieroglyphs,
    # as well as sequences of word characters optionally containing non-word characters in-between.