Allow lexing null, boolean, number and string
parent
806a78be4d
commit
cb754405b0
|
@ -0,0 +1,85 @@
|
||||||
|
# Values
|
||||||
|
## Null
|
||||||
|
|
||||||
|
Null can be used with `null`.
|
||||||
|
|
||||||
|
## Booleans
|
||||||
|
|
||||||
|
True can be used with `true` and false with `false`.
|
||||||
|
|
||||||
|
## Numbers
|
||||||
|
|
||||||
|
Integer values can be defined like followed (example with the value 127):
|
||||||
|
|
||||||
|
+----------------+--------------------------------------+
|
||||||
|
| Encoding type: | Code: |
|
||||||
|
+----------------+--------------------------------------+
|
||||||
|
| Binary | `0b0111111` or `0B0111111` |
|
||||||
|
+----------------+--------------------------------------+
|
||||||
|
| Octal | `0o0177` or `0O177`. Please use the |
|
||||||
|
| | lower case for a better readability. |
|
||||||
|
+----------------+--------------------------------------+
|
||||||
|
| Decimal | `127` |
|
||||||
|
+----------------+--------------------------------------+
|
||||||
|
| Hexadecimal | `0x7f`, `0x7F`, `0X7f` or `0X7F`. |
|
||||||
|
| | Mixing of upper and lowercase is |
|
||||||
|
| | allowed. |
|
||||||
|
+----------------+--------------------------------------+
|
||||||
|
|
||||||
|
## Strings
|
||||||
|
|
||||||
|
A string can be defined by putting single quotes around it like `'text'`. This strings aren't allowed to have line breaks (or single quotes). For string with line breaks use three single quotes like `'''text'''`. Be aware that indenting isn't consider and will be written into the string (when a line breaks appears into the string).
|
||||||
|
|
||||||
|
To escape specific string values (like single quotes):
|
||||||
|
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| Encoding name: | Enconding inside of the code: |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| Backslash (`\`) | `\\` |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| Single Quote (`'`) | `\'` |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| Bell | `\a` |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| Backspace | `\b` |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| Formfeed | `\f` |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| New line | `\n` |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| Carriage return | `\r` |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| Horizontal tab | `\t` |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| Vertical tab | `\v` |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| Octal 8-Bit | `\YYY` |
|
||||||
|
| | |
|
||||||
|
| | `YYY` have to be filled with the |
|
||||||
|
| | octal representation of the char. |
|
||||||
|
| | Allowed are three values between `0-7`. |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| 8-Bit Unicode | `\xYY` or `\XYY` |
|
||||||
|
| | |
|
||||||
|
| | `YY` have to be filled with the |
|
||||||
|
| | hexadecimal representation of the char. |
|
||||||
|
| | Allowed are two values between `0-9`, |
|
||||||
|
| | `a-f` and `A-F`. Mixing lower and upper |
|
||||||
|
| | case is allowed. |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| 16-Bit Unicode | `\uYYYY` |
|
||||||
|
| | |
|
||||||
|
| | `YYYY` have to be filled with the |
|
||||||
|
| | hexadecimal representation of the char. |
|
||||||
|
| | Allowed are four values between `0-9`, |
|
||||||
|
| | `a-f` and `A-F`. Mixing lower and upper |
|
||||||
|
| | case is allowed. |
|
||||||
|
+--------------------+-------------------------------------------+
|
||||||
|
| 32-Bit Unicode | `\UYYYYYYYY` |
|
||||||
|
| | |
|
||||||
|
| | `YYYYYYYY` have to be filled with |
|
||||||
|
| | hexadecimal representation of the char. |
|
||||||
|
| | Allowed are eight values between `0-9`, |
|
||||||
|
| | `a-f` and `A-F`. Mixing lower and upper |
|
||||||
|
| | case is allowed. |
|
||||||
|
+--------------------+-------------------------------------------+
|
|
@ -0,0 +1,118 @@
|
||||||
|
from ply import lex
|
||||||
|
from ply.lex import LexError
|
||||||
|
import io
|
||||||
|
|
||||||
|
# Definition
|
||||||
|
class LexerDef():
|
||||||
|
# Spacing and comments
|
||||||
|
t_ignore_COMMENT = r"\#[^\n\r]*"
|
||||||
|
t_ignore = " \t"
|
||||||
|
|
||||||
|
def t_NEWLINES(self,token):
|
||||||
|
r"\n|\r\n?"
|
||||||
|
token.lexer.lineno += 1
|
||||||
|
|
||||||
|
# Values rules
|
||||||
|
def t_VALUE_NULL(self, token):
|
||||||
|
r"null"
|
||||||
|
token.value = None
|
||||||
|
return token
|
||||||
|
def t_VALUE_BOOL(self, token):
|
||||||
|
r"true|false"
|
||||||
|
token.value = token.value == "true"
|
||||||
|
return token
|
||||||
|
def t_VALUE_NUMBER(self, token):
|
||||||
|
r"([1-9][0-9]*)|(0([0-9]+|(o|O)[0-7]+|(x|X)[0-9a-fA-F]+|(b|B)[0-1]+)?)"
|
||||||
|
if token.value.startswith("0") and len(token.value) > 1:
|
||||||
|
if "0" <= token.value[1] <= "9":
|
||||||
|
raise LexError("Int can't start with a leading zero %s." % (str(token),), token.value)
|
||||||
|
elif token.value[1] in ("x", "X"): # Hex
|
||||||
|
token.value = int(token.value[2:].lower(), 16)
|
||||||
|
elif token.value[1] in ("o", "O"): # Octet
|
||||||
|
token.value = int(token.value[2:].lower(), 8)
|
||||||
|
elif token.value[1] in ("b", "B"): # Binary
|
||||||
|
token.value = int(token.value[2:].lower(), 2)
|
||||||
|
else:
|
||||||
|
raise LexError("Unknown int encoding %s." % (repr(token.value),), token.value)
|
||||||
|
else: # Default int
|
||||||
|
token.value = int(token.value)
|
||||||
|
return token
|
||||||
|
|
||||||
|
def __int_to_unicode(self, number:int):
|
||||||
|
if number < 0 or number >= 2 ** 32:
|
||||||
|
raise LexError("Can't read unicode char greater then 2^32 or below 0.", "")
|
||||||
|
return bytes([(number >> 0) % 256, (number >> 8) % 256, (number >> 16) % 256, (number >> 24) % 256]).decode("UTF-32-LE")
|
||||||
|
_string_replacement = {"\\": "\\",
|
||||||
|
"'": "'",
|
||||||
|
"a": "\x07",
|
||||||
|
"b": "\x08",
|
||||||
|
"f": "\x0c",
|
||||||
|
"n": "\n",
|
||||||
|
"r": "\r",
|
||||||
|
"t": "\t",
|
||||||
|
"v": "\v"}
|
||||||
|
def t_VALUE_STRING(self, token):
|
||||||
|
r"'''(\\.|[^\\'])*'''|'(\\.|[^\n\\'])*'"
|
||||||
|
|
||||||
|
# Initalize
|
||||||
|
if token.value.startswith("'''"):
|
||||||
|
input_buffer = io.StringIO(token.value[3:-3])
|
||||||
|
else:
|
||||||
|
input_buffer = io.StringIO(token.value[1:-1])
|
||||||
|
result_buffer = io.StringIO()
|
||||||
|
|
||||||
|
# Make escaping
|
||||||
|
read = input_buffer.read(1)
|
||||||
|
while read:
|
||||||
|
if read == "\\":
|
||||||
|
read = input_buffer.read(1)
|
||||||
|
if not read:
|
||||||
|
raise LexError("Can't parse escped string.", token.value) # TODO: Give line number
|
||||||
|
elif read in self._string_replacement:
|
||||||
|
result_buffer.write(self._string_replacement[read])
|
||||||
|
elif "0" <= read <= "7":
|
||||||
|
tmp = read + input_buffer.read(2)
|
||||||
|
if len(tmp) < 2:
|
||||||
|
raise LexError("Can't read hex %s." % (str(token),), token.value)
|
||||||
|
result_buffer.write(self.__int_to_unicode(int(tmp, 8)))
|
||||||
|
elif read in ("x", "X"): # 2 hex unicode char
|
||||||
|
tmp = input_buffer.read(2)
|
||||||
|
if len(tmp) < 2:
|
||||||
|
raise LexError("Can't read hex %s." % (str(token),), token.value)
|
||||||
|
result_buffer.write(self.__int_to_unicode(int(tmp, 16)))
|
||||||
|
elif "u" == read: # 4 hex unicode char
|
||||||
|
tmp = input_buffer.read(4)
|
||||||
|
if len(tmp) < 4:
|
||||||
|
raise LexError("Can't read hex %s." % (str(token),), token.value)
|
||||||
|
result_buffer.write(self.__int_to_unicode(int(tmp, 16)))
|
||||||
|
elif "U" == read: # 8 hex unicode char
|
||||||
|
tmp = input_buffer.read(8)
|
||||||
|
if len(tmp) < 8:
|
||||||
|
raise LexError("Can't read hex %s." % (str(token),), token.value)
|
||||||
|
result_buffer.write(self.__int_to_unicode(int(tmp, 16)))
|
||||||
|
else:
|
||||||
|
raise LexError("Can't parse escped string %s." % (token,), token.value) # TODO: Give line number
|
||||||
|
else:
|
||||||
|
result_buffer.write(read)
|
||||||
|
read = input_buffer.read(1)
|
||||||
|
|
||||||
|
# Output result string
|
||||||
|
token.value = result_buffer.getvalue()
|
||||||
|
return token
|
||||||
|
|
||||||
|
# Build lexer
|
||||||
|
def __init__(self):
|
||||||
|
tokens = []
|
||||||
|
for i in filter(lambda x: x.startswith("t_"), dir(self)):
|
||||||
|
tokens.append(i[2:])
|
||||||
|
self.tokens = tuple(tokens)
|
||||||
|
|
||||||
|
def t_error(self, token):
|
||||||
|
raise LexError("Can't lex %s." % (str(token),), token.value)
|
||||||
|
|
||||||
|
def build(self):
|
||||||
|
return lex.lex(module=self)
|
||||||
|
|
||||||
|
# Gen lexer
|
||||||
|
def gen_lexer():
|
||||||
|
return LexerDef().build()
|
|
@ -0,0 +1,23 @@
|
||||||
|
import os
|
||||||
|
from unittest import main
|
||||||
|
from . import utils
|
||||||
|
|
||||||
|
|
||||||
|
# Info for big tests
|
||||||
|
if not utils.RUN_BIG_TESTS:
|
||||||
|
print("Skip big tests. To run them set environmetn variable RUN_BIG_TESTS.")
|
||||||
|
|
||||||
|
|
||||||
|
# Load sub packages
|
||||||
|
def _load_subpackages(path, package):
|
||||||
|
for module in filter(lambda x: not x.startswith("_"), os.listdir(path)):
|
||||||
|
module_path = os.path.join(path, module)
|
||||||
|
if os.path.isfile(module_path) and module.endswith(".py") and "." not in module[:-3]:
|
||||||
|
for iID, i in filter(lambda x: not x[0].startswith("_"), __import__("%s.%s" % (package, module[:-3]), fromlist=(package,)).__dict__.items()):
|
||||||
|
globals()[iID] = i
|
||||||
|
elif os.path.isdir(module_path) and "." not in module:
|
||||||
|
package_name = "%s.%s" % (package, module)
|
||||||
|
for iID, i in __import__(package_name, fromlist=(package,)).__dict__.items():
|
||||||
|
globals()[iID] = i
|
||||||
|
_load_subpackages(module_path, package_name)
|
||||||
|
_load_subpackages(os.path.split(__file__)[0], "looplang.test")
|
|
@ -0,0 +1,82 @@
|
||||||
|
import unittest
|
||||||
|
from . import utils
|
||||||
|
|
||||||
|
|
||||||
|
class TestValueNumber(unittest.TestCase):
|
||||||
|
__test_ints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 123, 1234, 12345, 123456, 1234567, 12345678, 123456789, 1234567890]
|
||||||
|
def test_numbers(self):
|
||||||
|
result = utils.list_tokens(" ".join(map(str, range(0, 101))) + " 1000000000000000000000")
|
||||||
|
self.assertEqual(len(result), 102)
|
||||||
|
for original, i in zip(range(0, 101), result[:-1]):
|
||||||
|
self.assertEqual(i.type, "VALUE_NUMBER")
|
||||||
|
self.assertEqual(i.value, original)
|
||||||
|
self.assertEqual(result[-1].type, "VALUE_NUMBER")
|
||||||
|
self.assertEqual(result[-1].value, 1000000000000000000000)
|
||||||
|
|
||||||
|
def test_all_number_chars(self):
|
||||||
|
result = utils.list_tokens("1234567890123456789")
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_NUMBER")
|
||||||
|
self.assertEqual(result[0].value, 1234567890123456789)
|
||||||
|
|
||||||
|
def test_no_leading_zero(self):
|
||||||
|
# Test zero
|
||||||
|
result = utils.list_tokens("0")
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_NUMBER")
|
||||||
|
self.assertEqual(result[0].value, 0)
|
||||||
|
|
||||||
|
# Test leading zero
|
||||||
|
with self.assertRaises(utils.LexError) as context:
|
||||||
|
utils.list_tokens("0123456789")
|
||||||
|
self.assertTrue(
|
||||||
|
"Leading zero of a numer is not allowed %s." % (str(context),))
|
||||||
|
|
||||||
|
def test_dez(self):
|
||||||
|
for i in self.__test_ints:
|
||||||
|
result = utils.list_tokens(str(i))
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_NUMBER")
|
||||||
|
self.assertEqual(result[0].value, i)
|
||||||
|
|
||||||
|
def test_bin_lower(self):
|
||||||
|
for i in self.__test_ints:
|
||||||
|
result = utils.list_tokens(bin(i).lower())
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_NUMBER")
|
||||||
|
self.assertEqual(result[0].value, i)
|
||||||
|
|
||||||
|
def test_bin_upper(self):
|
||||||
|
for i in self.__test_ints:
|
||||||
|
result = utils.list_tokens(bin(i).upper())
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_NUMBER")
|
||||||
|
self.assertEqual(result[0].value, i)
|
||||||
|
|
||||||
|
def test_octal_lower(self):
|
||||||
|
for i in self.__test_ints:
|
||||||
|
result = utils.list_tokens(oct(i).lower())
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_NUMBER")
|
||||||
|
self.assertEqual(result[0].value, i)
|
||||||
|
|
||||||
|
def test_octal_upper(self):
|
||||||
|
for i in self.__test_ints:
|
||||||
|
result = utils.list_tokens(oct(i).upper())
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_NUMBER")
|
||||||
|
self.assertEqual(result[0].value, i)
|
||||||
|
|
||||||
|
def test_hex_lower(self):
|
||||||
|
for i in self.__test_ints:
|
||||||
|
result = utils.list_tokens(hex(i).lower())
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_NUMBER")
|
||||||
|
self.assertEqual(result[0].value, i)
|
||||||
|
|
||||||
|
def test_hex_upper(self):
|
||||||
|
for i in self.__test_ints:
|
||||||
|
result = utils.list_tokens(hex(i).upper())
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_NUMBER")
|
||||||
|
self.assertEqual(result[0].value, i)
|
|
@ -0,0 +1,24 @@
|
||||||
|
import unittest
|
||||||
|
from . import utils
|
||||||
|
|
||||||
|
|
||||||
|
class TestValueNull(unittest.TestCase):
|
||||||
|
def test_null(self):
|
||||||
|
result = utils.list_tokens("null")
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_NULL")
|
||||||
|
self.assertIsNone(result[0].value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestValueBool(unittest.TestCase):
|
||||||
|
def test_true(self):
|
||||||
|
result = utils.list_tokens("true")
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_BOOL")
|
||||||
|
self.assertEqual(result[0].value, True)
|
||||||
|
|
||||||
|
def test_false(self):
|
||||||
|
result = utils.list_tokens("false")
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_BOOL")
|
||||||
|
self.assertEqual(result[0].value, False)
|
|
@ -0,0 +1,339 @@
|
||||||
|
import unittest
|
||||||
|
from . import utils
|
||||||
|
|
||||||
|
|
||||||
|
class TestValueStringSingle(unittest.TestCase):
|
||||||
|
def test_empty(self):
|
||||||
|
result = utils.list_tokens(" '' ")
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, '')
|
||||||
|
|
||||||
|
result = utils.list_tokens(" '''''' ")
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, '')
|
||||||
|
|
||||||
|
def test_content(self):
|
||||||
|
result = utils.list_tokens(" 'abc def\\t1234' ")
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, 'abc def\t1234')
|
||||||
|
|
||||||
|
result = utils.list_tokens(" '''abc def\\t1234''' ")
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, 'abc def\t1234')
|
||||||
|
|
||||||
|
def test_no_newline(self):
|
||||||
|
with self.assertRaises(utils.LexError):
|
||||||
|
utils.list_tokens(" 'a\nb' ")
|
||||||
|
|
||||||
|
result = utils.list_tokens(" '''a\nb''' ")
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, 'a\nb')
|
||||||
|
|
||||||
|
def test_valid_octals(self):
|
||||||
|
for i in range(256):
|
||||||
|
# Prepare
|
||||||
|
number = oct(i)[2:]
|
||||||
|
number = "'\\%s%s'" % ("0" * (3 - len(number)), number)
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
def test_valid_octals_multiline(self):
|
||||||
|
for i in range(256):
|
||||||
|
# Prepare
|
||||||
|
number = oct(i)[2:]
|
||||||
|
number = "'''\\%s%s'''" % ("0" * (3 - len(number)), number)
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
def test_valid_hex_lower(self):
|
||||||
|
# Lower x
|
||||||
|
for i in range(0o1000):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].lower()
|
||||||
|
number = "'\\x%s%s'" % ("0" * (2 - len(number)), number)
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
# Upper x
|
||||||
|
for i in range(0o1000):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].lower()
|
||||||
|
number = "'\\X%s%s'" % ("0" * (2 - len(number)), number)
|
||||||
|
comparison = eval(number.lower()) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
def test_valid_hex_lower_multiline(self):
|
||||||
|
# Lower x
|
||||||
|
for i in range(0o1000):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].lower()
|
||||||
|
number = "'''\\x%s%s'''" % ("0" * (2 - len(number)), number)
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
# Upper x
|
||||||
|
for i in range(0o1000):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].lower()
|
||||||
|
number = "'''\\X%s%s'''" % ("0" * (2 - len(number)), number)
|
||||||
|
comparison = eval(number.lower()) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
def test_valid_hex_upper(self):
|
||||||
|
# Lower x
|
||||||
|
for i in range(256):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].upper()
|
||||||
|
number = "'\\x%s%s'" % ("0" * (2 - len(number)), number)
|
||||||
|
comparison = eval(number.lower()) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
# Upper x
|
||||||
|
for i in range(256):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].upper()
|
||||||
|
number = "'\\X%s%s'" % ("0" * (2 - len(number)), number)
|
||||||
|
comparison = eval(number.lower()) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
def test_valid_hex_upper_multiline(self):
|
||||||
|
# Lower x
|
||||||
|
for i in range(256):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].upper()
|
||||||
|
number = "'''\\x%s%s'''" % ("0" * (2 - len(number)), number)
|
||||||
|
comparison = eval(number.lower()) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
# Upper x
|
||||||
|
for i in range(256):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].upper()
|
||||||
|
number = "'''\\X%s%s'''" % ("0" * (2 - len(number)), number)
|
||||||
|
comparison = eval(number.lower()) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
def __filter_codes(self, source):
|
||||||
|
def filter_func(value):
|
||||||
|
if 0xd800 <= value < 0xe000:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
return filter(filter_func, source)
|
||||||
|
|
||||||
|
@utils.big_test
|
||||||
|
def test_valid_small_unicode(self):
|
||||||
|
# Upper x
|
||||||
|
for i in self.__filter_codes(range(2 ** 16)):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].lower()
|
||||||
|
number = "'\\u%s%s'" % ("0" * (4 - len(number)), number)
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
# Upper u
|
||||||
|
for i in self.__filter_codes(range(2 ** 16)):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].upper()
|
||||||
|
number = "'\\u%s%s'" % ("0" * (4 - len(number)), number)
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
@utils.big_test
|
||||||
|
def test_valid_small_unicode_multiline(self):
|
||||||
|
# Upper x
|
||||||
|
for i in self.__filter_codes(range(2 ** 16)):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].lower()
|
||||||
|
number = "'''\\u%s%s'''" % ("0" * (4 - len(number)), number)
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
# Upper u
|
||||||
|
for i in self.__filter_codes(range(2 ** 16)):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].upper()
|
||||||
|
number = "'''\\u%s%s'''" % ("0" * (4 - len(number)), number)
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
@utils.big_test
|
||||||
|
def test_valid_big_unicode(self):
|
||||||
|
# Upper x
|
||||||
|
for i in self.__filter_codes(range(0x00110000)):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].lower()
|
||||||
|
number = "'\\U%s%s'" % ("0" * (8 - len(number)), number)
|
||||||
|
try:
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
except:
|
||||||
|
raise ValueError(number)
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
# Upper u
|
||||||
|
for i in self.__filter_codes(range(0x00110000)):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].upper()
|
||||||
|
number = "'\\U%s%s'" % ("0" * (8 - len(number)), number)
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
@utils.big_test
|
||||||
|
def test_valid_big_unicode_multiline(self):
|
||||||
|
# Upper x
|
||||||
|
for i in self.__filter_codes(range(0x00110000)):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].lower()
|
||||||
|
number = "'''\\U%s%s'''" % ("0" * (8 - len(number)), number)
|
||||||
|
try:
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
except:
|
||||||
|
raise ValueError(number)
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
# Upper u
|
||||||
|
for i in self.__filter_codes(range(0x00110000)):
|
||||||
|
# Prepare
|
||||||
|
number = hex(i)[2:].upper()
|
||||||
|
number = "'''\\U%s%s'''" % ("0" * (8 - len(number)), number)
|
||||||
|
comparison = eval(number) # TODO: Better string generation
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = utils.list_tokens(number)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, comparison)
|
||||||
|
|
||||||
|
_special_chars = {"\\": "\\",
|
||||||
|
"'": "'",
|
||||||
|
"a": "\x07",
|
||||||
|
"b": "\x08",
|
||||||
|
"f": "\x0c",
|
||||||
|
"n": "\n",
|
||||||
|
"r": "\r",
|
||||||
|
"t": "\t",
|
||||||
|
"v": "\v",
|
||||||
|
"000": "\000",
|
||||||
|
"100": "\100",
|
||||||
|
"200": "\200",
|
||||||
|
"300": "\300",
|
||||||
|
"400": "\400",
|
||||||
|
"500": "\500",
|
||||||
|
"600": "\600",
|
||||||
|
"700": "\700",
|
||||||
|
"x00": "\x00",
|
||||||
|
"X00": "\x00",
|
||||||
|
"u0000": "\x00",
|
||||||
|
"U00000000": "\x00"}
|
||||||
|
|
||||||
|
def test_special_values(self):
|
||||||
|
for code, code_result in self._special_chars.items():
|
||||||
|
result = utils.list_tokens(" '\\%s' " % code)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, code_result)
|
||||||
|
|
||||||
|
result = utils.list_tokens(" '''\\%s''' " % code)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertEqual(result[0].type, "VALUE_STRING")
|
||||||
|
self.assertEqual(result[0].value, code_result)
|
||||||
|
|
||||||
|
def test_not_special_values(self):
|
||||||
|
ignore = set(map(lambda x: x[0], self._special_chars))
|
||||||
|
all_chars = set(map(lambda x: bytes([x]).decode(), range(128)))
|
||||||
|
tmp = all_chars - ignore
|
||||||
|
for code in tmp:
|
||||||
|
with self.assertRaises(utils.LexError):
|
||||||
|
utils.list_tokens(" '\\%s' " % (code,))
|
||||||
|
with self.assertRaises(utils.LexError):
|
||||||
|
utils.list_tokens(" '''\\%s''' " % code)
|
|
@ -0,0 +1,17 @@
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
from looplang.lexer import gen_lexer, LexError
|
||||||
|
|
||||||
|
|
||||||
|
def list_tokens(code:str):
|
||||||
|
lexer = gen_lexer()
|
||||||
|
lexer.input(code)
|
||||||
|
return list(lexer)
|
||||||
|
|
||||||
|
|
||||||
|
RUN_BIG_TESTS = os.environ.get("RUN_BIG_TESTS", None) is not None
|
||||||
|
def big_test(func):
|
||||||
|
if RUN_BIG_TESTS:
|
||||||
|
return func
|
||||||
|
else:
|
||||||
|
return unittest.skip(func)
|
2
setup.py
2
setup.py
|
@ -12,6 +12,6 @@ setup(name="looplang",
|
||||||
author="Marko Semet",
|
author="Marko Semet",
|
||||||
author_email="marko@marko10-000.de",
|
author_email="marko@marko10-000.de",
|
||||||
url="https://marko10-000.de/project/looplang",
|
url="https://marko10-000.de/project/looplang",
|
||||||
packages=find_packages("looplang"),
|
packages=find_packages(),
|
||||||
install_requires=["ply>=3.0<4"]
|
install_requires=["ply>=3.0<4"]
|
||||||
)
|
)
|
|
@ -0,0 +1,4 @@
|
||||||
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
|
cd "$(dirname "$0")" &&
|
||||||
|
./venv.sh python3 -m unittest looplang.test
|
6
venv.sh
6
venv.sh
|
@ -3,11 +3,11 @@
|
||||||
cd "$(dirname "$0")"
|
cd "$(dirname "$0")"
|
||||||
python3 -m venv venv &&
|
python3 -m venv venv &&
|
||||||
source venv/bin/activate &&
|
source venv/bin/activate &&
|
||||||
pip install --upgrade pip &&
|
pip install pip &&
|
||||||
pip install --upgrade . &&
|
pip install . &&
|
||||||
if [ "$#" -gt 1 ]
|
if [ "$#" -gt 1 ]
|
||||||
then
|
then
|
||||||
exec "$@"
|
exec -- "$@"
|
||||||
else
|
else
|
||||||
exec "$SHELL"
|
exec "$SHELL"
|
||||||
fi
|
fi
|
Loading…
Reference in New Issue