-- Copyright 2015-2019 Alejandro Baez (https://keybase.io/baez). See License.txt. -- Rust LPeg lexer. local lexer = require("lexer") local token, word_match = lexer.token, lexer.word_match local B, P, R, S, V = lpeg.B, lpeg.P, lpeg.R, lpeg.S, lpeg.V local C, Cmt = lpeg.C, lpeg.Cmt local lex = lexer.new('rust') -- Whitespace. lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -- https://github.com/rust-lang/rust/blob/stable/src/libsyntax_pos/symbol.rs lex:add_rule('keyword', token(lexer.KEYWORD, B(-P('r#')) * word_match[[ Self abstract as async auto await become box break catch const continue crate default do dyn else enum extern false final fn for if impl in let loop macro match mod move mut override priv pub ref return self static struct super trait true try type typeof union unsafe unsized use virtual where while yield ]])) -- Macro names. lex:add_rule('macro', token(lexer.FUNCTION, lexer.word * S("!"))) -- Library types lex:add_rule('library', token(lexer.LABEL, lexer.upper * (lexer.lower + lexer.dec_num)^1)) -- Numbers. local identifier = P('r#')^-1 * lexer.word local digit = lexer.digit local decimal_literal = digit * (digit + '_')^0 local function integer_suffix(digit) return P('_')^0 * digit * (digit + '_')^0 end local function opt_cap(patt) return C(patt^-1) end local float = decimal_literal * (Cmt(opt_cap('.' * decimal_literal) * opt_cap(S('eE') * S('+-')^-1 * integer_suffix(digit)) * opt_cap(P('f32') + 'f64'), function (input, index, decimals, exponent, type) return decimals ~= "" or exponent ~= "" or type ~= "" end) + '.' * -(S('._') + identifier)) local function prefixed_integer(prefix, digit) return P(prefix) * integer_suffix(digit) end local integer = (prefixed_integer('0b', S('01')) + prefixed_integer('0o', R('07')) + prefixed_integer('0x', lexer.xdigit) + decimal_literal) * (S('iu') * (P('8') + '16' + '32' + '64' + '128' + 'size'))^-1 lex:add_rule('number', token(lexer.NUMBER, float + integer)) -- Types. lex:add_rule('type', token(lexer.TYPE, word_match[[ () bool isize usize char str u8 u16 u32 u64 u128 i8 i16 i32 i64 i128 f32 f64 ]])) -- Strings. local sq_str = P('b')^-1 * lexer.delimited_range("'", true) local dq_str = P('b')^-1 * lexer.delimited_range('"') local raw_str = Cmt(P('b')^-1 * P('r') * C(P('#')^0) * '"', function(input, index, hashes) local _, e = input:find('"'..hashes, index, true) return (e or #input) + 1 end) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + raw_str)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, identifier)) -- Comments. local line_comment = '//' * lexer.nonnewline_esc^0 local block_comment = lexer.nested_pair('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=`^~@&|?#~:;,.()[]{}'))) -- Attributes. lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, "#[" * (lexer.nonnewline - ']')^0 * P("]")^-1)) -- Fold points. lex:add_fold_point(lexer.COMMENT, '/*', '*/') lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) lex:add_fold_point(lexer.OPERATOR, '(', ')') lex:add_fold_point(lexer.OPERATOR, '{', '}') return lex