105 lines
4.4 KiB
Lua
105 lines
4.4 KiB
Lua
|
-- Copyright 2006-2019 Mitchell mitchell.att.foicica.com. See License.txt.
|
||
|
-- Python LPeg lexer.
|
||
|
|
||
|
local lexer = require('lexer')
|
||
|
local token, word_match = lexer.token, lexer.word_match
|
||
|
local P, R, S = lpeg.P, lpeg.R, lpeg.S
|
||
|
|
||
|
local lex = lexer.new('python', {fold_by_indentation = true})
|
||
|
|
||
|
-- Whitespace.
|
||
|
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
|
||
|
|
||
|
-- Keywords.
|
||
|
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
|
||
|
and as assert break class continue def del elif else except exec finally for
|
||
|
from global if import in is lambda nonlocal not or pass print raise return try
|
||
|
while with yield
|
||
|
-- Descriptors/attr access.
|
||
|
__get__ __set__ __delete__ __slots__
|
||
|
-- Class.
|
||
|
__new__ __init__ __del__ __repr__ __str__ __cmp__ __index__ __lt__ __le__
|
||
|
__gt__ __ge__ __eq__ __ne__ __hash__ __nonzero__ __getattr__ __getattribute__
|
||
|
__setattr__ __delattr__ __call__
|
||
|
-- Operator.
|
||
|
__add__ __sub__ __mul__ __div__ __floordiv__ __mod__ __divmod__ __pow__
|
||
|
__and__ __xor__ __or__ __lshift__ __rshift__ __nonzero__ __neg__ __pos__
|
||
|
__abs__ __invert__ __iadd__ __isub__ __imul__ __idiv__ __ifloordiv__ __imod__
|
||
|
__ipow__ __iand__ __ixor__ __ior__ __ilshift__ __irshift__
|
||
|
-- Conversions.
|
||
|
__int__ __long__ __float__ __complex__ __oct__ __hex__ __coerce__
|
||
|
-- Containers.
|
||
|
__len__ __getitem__ __missing__ __setitem__ __delitem__ __contains__ __iter__
|
||
|
__getslice__ __setslice__ __delslice__
|
||
|
-- Module and class attribs.
|
||
|
__doc__ __name__ __dict__ __file__ __path__ __module__ __bases__ __class__
|
||
|
__self__
|
||
|
-- Stdlib/sys.
|
||
|
__builtin__ __future__ __main__ __import__ __stdin__ __stdout__ __stderr__
|
||
|
-- Other.
|
||
|
__debug__ __doc__ __import__ __name__
|
||
|
]]))
|
||
|
|
||
|
-- Functions.
|
||
|
lex:add_rule('function', token(lexer.FUNCTION, word_match[[
|
||
|
abs all any apply basestring bool buffer callable chr classmethod cmp coerce
|
||
|
compile complex copyright credits delattr dict dir divmod enumerate eval
|
||
|
execfile exit file filter float frozenset getattr globals hasattr hash help
|
||
|
hex id input int intern isinstance issubclass iter len license list locals
|
||
|
long map max min object oct open ord pow property quit range raw_input reduce
|
||
|
reload repr reversed round set setattr slice sorted staticmethod str sum super
|
||
|
tuple type unichr unicode vars xrange zip
|
||
|
]]))
|
||
|
|
||
|
-- Constants.
|
||
|
lex:add_rule('constant', token(lexer.CONSTANT, word_match[[
|
||
|
ArithmeticError AssertionError AttributeError BaseException DeprecationWarning
|
||
|
EOFError Ellipsis EnvironmentError Exception False FloatingPointError
|
||
|
FutureWarning GeneratorExit IOError ImportError ImportWarning IndentationError
|
||
|
IndexError KeyError KeyboardInterrupt LookupError MemoryError NameError None
|
||
|
NotImplemented NotImplementedError OSError OverflowError
|
||
|
PendingDeprecationWarning ReferenceError RuntimeError RuntimeWarning
|
||
|
StandardError StopIteration SyntaxError SyntaxWarning SystemError SystemExit
|
||
|
TabError True TypeError UnboundLocalError UnicodeDecodeError
|
||
|
UnicodeEncodeError UnicodeError UnicodeTranslateError UnicodeWarning
|
||
|
UserWarning ValueError Warning ZeroDivisionError
|
||
|
]]))
|
||
|
|
||
|
-- Self.
|
||
|
lex:add_rule('self', token('self', P('self')))
|
||
|
lex:add_style('self', lexer.STYLE_TYPE)
|
||
|
|
||
|
-- Identifiers.
|
||
|
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
|
||
|
|
||
|
-- Comments.
|
||
|
lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0))
|
||
|
|
||
|
-- Strings.
|
||
|
local sq_str = P('u')^-1 * lexer.delimited_range("'", true)
|
||
|
local dq_str = P('U')^-1 * lexer.delimited_range('"', true)
|
||
|
local triple_sq_str = "'''" * (lexer.any - "'''")^0 * P("'''")^-1
|
||
|
local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1
|
||
|
-- TODO: raw_strs cannot end in single \.
|
||
|
local raw_sq_str = P('u')^-1 * 'r' * lexer.delimited_range("'", false, true)
|
||
|
local raw_dq_str = P('U')^-1 * 'R' * lexer.delimited_range('"', false, true)
|
||
|
lex:add_rule('string', token(lexer.STRING, triple_sq_str + triple_dq_str +
|
||
|
sq_str + dq_str + raw_sq_str +
|
||
|
raw_dq_str))
|
||
|
|
||
|
-- Numbers.
|
||
|
local dec = lexer.digit^1 * S('Ll')^-1
|
||
|
local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
|
||
|
local oct = '0' * R('07')^1 * S('Ll')^-1
|
||
|
local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec)
|
||
|
lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
|
||
|
|
||
|
-- Decorators.
|
||
|
lex:add_rule('decorator', token('decorator', '@' * lexer.nonnewline^0))
|
||
|
lex:add_style('decorator', lexer.STYLE_PREPROCESSOR)
|
||
|
|
||
|
-- Operators.
|
||
|
lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`')))
|
||
|
|
||
|
return lex
|