iup-stack/iup/etc/lexlua/python.lua

105 lines
4.4 KiB
Lua
Raw Permalink Normal View History

2023-02-20 16:44:45 +00:00
-- Copyright 2006-2019 Mitchell mitchell.att.foicica.com. See License.txt.
-- Python LPeg lexer.
local lexer = require('lexer')
local token, word_match = lexer.token, lexer.word_match
local P, R, S = lpeg.P, lpeg.R, lpeg.S
local lex = lexer.new('python', {fold_by_indentation = true})
-- Whitespace.
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
and as assert break class continue def del elif else except exec finally for
from global if import in is lambda nonlocal not or pass print raise return try
while with yield
-- Descriptors/attr access.
__get__ __set__ __delete__ __slots__
-- Class.
__new__ __init__ __del__ __repr__ __str__ __cmp__ __index__ __lt__ __le__
__gt__ __ge__ __eq__ __ne__ __hash__ __nonzero__ __getattr__ __getattribute__
__setattr__ __delattr__ __call__
-- Operator.
__add__ __sub__ __mul__ __div__ __floordiv__ __mod__ __divmod__ __pow__
__and__ __xor__ __or__ __lshift__ __rshift__ __nonzero__ __neg__ __pos__
__abs__ __invert__ __iadd__ __isub__ __imul__ __idiv__ __ifloordiv__ __imod__
__ipow__ __iand__ __ixor__ __ior__ __ilshift__ __irshift__
-- Conversions.
__int__ __long__ __float__ __complex__ __oct__ __hex__ __coerce__
-- Containers.
__len__ __getitem__ __missing__ __setitem__ __delitem__ __contains__ __iter__
__getslice__ __setslice__ __delslice__
-- Module and class attribs.
__doc__ __name__ __dict__ __file__ __path__ __module__ __bases__ __class__
__self__
-- Stdlib/sys.
__builtin__ __future__ __main__ __import__ __stdin__ __stdout__ __stderr__
-- Other.
__debug__ __doc__ __import__ __name__
]]))
-- Functions.
lex:add_rule('function', token(lexer.FUNCTION, word_match[[
abs all any apply basestring bool buffer callable chr classmethod cmp coerce
compile complex copyright credits delattr dict dir divmod enumerate eval
execfile exit file filter float frozenset getattr globals hasattr hash help
hex id input int intern isinstance issubclass iter len license list locals
long map max min object oct open ord pow property quit range raw_input reduce
reload repr reversed round set setattr slice sorted staticmethod str sum super
tuple type unichr unicode vars xrange zip
]]))
-- Constants.
lex:add_rule('constant', token(lexer.CONSTANT, word_match[[
ArithmeticError AssertionError AttributeError BaseException DeprecationWarning
EOFError Ellipsis EnvironmentError Exception False FloatingPointError
FutureWarning GeneratorExit IOError ImportError ImportWarning IndentationError
IndexError KeyError KeyboardInterrupt LookupError MemoryError NameError None
NotImplemented NotImplementedError OSError OverflowError
PendingDeprecationWarning ReferenceError RuntimeError RuntimeWarning
StandardError StopIteration SyntaxError SyntaxWarning SystemError SystemExit
TabError True TypeError UnboundLocalError UnicodeDecodeError
UnicodeEncodeError UnicodeError UnicodeTranslateError UnicodeWarning
UserWarning ValueError Warning ZeroDivisionError
]]))
-- Self.
lex:add_rule('self', token('self', P('self')))
lex:add_style('self', lexer.STYLE_TYPE)
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0))
-- Strings.
local sq_str = P('u')^-1 * lexer.delimited_range("'", true)
local dq_str = P('U')^-1 * lexer.delimited_range('"', true)
local triple_sq_str = "'''" * (lexer.any - "'''")^0 * P("'''")^-1
local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1
-- TODO: raw_strs cannot end in single \.
local raw_sq_str = P('u')^-1 * 'r' * lexer.delimited_range("'", false, true)
local raw_dq_str = P('U')^-1 * 'R' * lexer.delimited_range('"', false, true)
lex:add_rule('string', token(lexer.STRING, triple_sq_str + triple_dq_str +
sq_str + dq_str + raw_sq_str +
raw_dq_str))
-- Numbers.
local dec = lexer.digit^1 * S('Ll')^-1
local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
local oct = '0' * R('07')^1 * S('Ll')^-1
local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec)
lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
-- Decorators.
lex:add_rule('decorator', token('decorator', '@' * lexer.nonnewline^0))
lex:add_style('decorator', lexer.STYLE_PREPROCESSOR)
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`')))
return lex