-- Copyright 2006-2019 Mitchell mitchell.att.foicica.com. See License.txt. -- Python LPeg lexer. local lexer = require('lexer') local token, word_match = lexer.token, lexer.word_match local P, R, S = lpeg.P, lpeg.R, lpeg.S local lex = lexer.new('python', {fold_by_indentation = true}) -- Whitespace. lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ and as assert break class continue def del elif else except exec finally for from global if import in is lambda nonlocal not or pass print raise return try while with yield -- Descriptors/attr access. __get__ __set__ __delete__ __slots__ -- Class. __new__ __init__ __del__ __repr__ __str__ __cmp__ __index__ __lt__ __le__ __gt__ __ge__ __eq__ __ne__ __hash__ __nonzero__ __getattr__ __getattribute__ __setattr__ __delattr__ __call__ -- Operator. __add__ __sub__ __mul__ __div__ __floordiv__ __mod__ __divmod__ __pow__ __and__ __xor__ __or__ __lshift__ __rshift__ __nonzero__ __neg__ __pos__ __abs__ __invert__ __iadd__ __isub__ __imul__ __idiv__ __ifloordiv__ __imod__ __ipow__ __iand__ __ixor__ __ior__ __ilshift__ __irshift__ -- Conversions. __int__ __long__ __float__ __complex__ __oct__ __hex__ __coerce__ -- Containers. __len__ __getitem__ __missing__ __setitem__ __delitem__ __contains__ __iter__ __getslice__ __setslice__ __delslice__ -- Module and class attribs. __doc__ __name__ __dict__ __file__ __path__ __module__ __bases__ __class__ __self__ -- Stdlib/sys. __builtin__ __future__ __main__ __import__ __stdin__ __stdout__ __stderr__ -- Other. __debug__ __doc__ __import__ __name__ ]])) -- Functions. lex:add_rule('function', token(lexer.FUNCTION, word_match[[ abs all any apply basestring bool buffer callable chr classmethod cmp coerce compile complex copyright credits delattr dict dir divmod enumerate eval execfile exit file filter float frozenset getattr globals hasattr hash help hex id input int intern isinstance issubclass iter len license list locals long map max min object oct open ord pow property quit range raw_input reduce reload repr reversed round set setattr slice sorted staticmethod str sum super tuple type unichr unicode vars xrange zip ]])) -- Constants. lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ ArithmeticError AssertionError AttributeError BaseException DeprecationWarning EOFError Ellipsis EnvironmentError Exception False FloatingPointError FutureWarning GeneratorExit IOError ImportError ImportWarning IndentationError IndexError KeyError KeyboardInterrupt LookupError MemoryError NameError None NotImplemented NotImplementedError OSError OverflowError PendingDeprecationWarning ReferenceError RuntimeError RuntimeWarning StandardError StopIteration SyntaxError SyntaxWarning SystemError SystemExit TabError True TypeError UnboundLocalError UnicodeDecodeError UnicodeEncodeError UnicodeError UnicodeTranslateError UnicodeWarning UserWarning ValueError Warning ZeroDivisionError ]])) -- Self. lex:add_rule('self', token('self', P('self'))) lex:add_style('self', lexer.STYLE_TYPE) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0)) -- Strings. local sq_str = P('u')^-1 * lexer.delimited_range("'", true) local dq_str = P('U')^-1 * lexer.delimited_range('"', true) local triple_sq_str = "'''" * (lexer.any - "'''")^0 * P("'''")^-1 local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 -- TODO: raw_strs cannot end in single \. local raw_sq_str = P('u')^-1 * 'r' * lexer.delimited_range("'", false, true) local raw_dq_str = P('U')^-1 * 'R' * lexer.delimited_range('"', false, true) lex:add_rule('string', token(lexer.STRING, triple_sq_str + triple_dq_str + sq_str + dq_str + raw_sq_str + raw_dq_str)) -- Numbers. local dec = lexer.digit^1 * S('Ll')^-1 local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0 local oct = '0' * R('07')^1 * S('Ll')^-1 local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec) lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) -- Decorators. lex:add_rule('decorator', token('decorator', '@' * lexer.nonnewline^0)) lex:add_style('decorator', lexer.STYLE_PREPROCESSOR) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))) return lex