iup-stack/iup/etc/lexlua/perl.lua

143 lines
6.1 KiB
Lua
Raw Permalink Normal View History

2023-02-20 16:44:45 +00:00
-- Copyright 2006-2019 Mitchell mitchell.att.foicica.com. See License.txt.
-- Perl LPeg lexer.
local lexer = require('lexer')
local token, word_match = lexer.token, lexer.word_match
local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
local lex = lexer.new('perl')
-- Whitespace.
lex:add_rule('perl', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
STDIN STDOUT STDERR BEGIN END CHECK INIT
require use
break continue do each else elsif foreach for if last local my next our
package return sub unless until while __FILE__ __LINE__ __PACKAGE__
and or not eq ne lt gt le ge
]]))
-- Markers.
lex:add_rule('marker', token(lexer.COMMENT, word_match[[__DATA__ __END__]] *
lexer.any^0))
-- Functions.
lex:add_rule('function', token(lexer.FUNCTION, word_match[[
abs accept alarm atan2 bind binmode bless caller chdir chmod chomp chop chown
chr chroot closedir close connect cos crypt dbmclose dbmopen defined delete
die dump each endgrent endhostent endnetent endprotoent endpwent endservent
eof eval exec exists exit exp fcntl fileno flock fork format formline getc
getgrent getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin
getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid getpriority
getprotobyname getprotobynumber getprotoent getpwent getpwnam getpwuid
getservbyname getservbyport getservent getsockname getsockopt glob gmtime goto
grep hex import index int ioctl join keys kill lcfirst lc length link listen
localtime log lstat map mkdir msgctl msgget msgrcv msgsnd new oct opendir open
ord pack pipe pop pos printf print prototype push quotemeta rand readdir read
readlink recv redo ref rename reset reverse rewinddir rindex rmdir scalar
seekdir seek select semctl semget semop send setgrent sethostent setnetent
setpgrp setpriority setprotoent setpwent setservent setsockopt shift shmctl
shmget shmread shmwrite shutdown sin sleep socket socketpair sort splice split
sprintf sqrt srand stat study substr symlink syscall sysread sysseek system
syswrite telldir tell tied tie time times truncate ucfirst uc umask undef
unlink unpack unshift untie utime values vec wait waitpid wantarray warn write
]]))
local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'}
local literal_delimitted = P(function(input, index) -- for single delimiter sets
local delimiter = input:sub(index, index)
if not delimiter:find('%w') then -- only non alpha-numerics
local match_pos, patt
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
patt = lexer.delimited_range(s..e, false, false, true)
else
patt = lexer.delimited_range(delimiter)
end
match_pos = lpeg.match(patt, input, index)
return match_pos or #input + 1
end
end)
local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets
local delimiter = input:sub(index, index)
-- Only consider non-alpha-numerics and non-spaces as delimiters. The
-- non-spaces are used to ignore operators like "-s".
if not delimiter:find('[%w ]') then
local match_pos, patt
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
patt = lexer.delimited_range(s..e, false, false, true)
else
patt = lexer.delimited_range(delimiter)
end
first_match_pos = lpeg.match(patt, input, index)
final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
if not final_match_pos then -- using (), [], {}, or <> notation
final_match_pos = lpeg.match(lexer.space^0 * patt, input, first_match_pos)
end
return final_match_pos or #input + 1
end
end)
-- Strings.
local sq_str = lexer.delimited_range("'")
local dq_str = lexer.delimited_range('"')
local cmd_str = lexer.delimited_range('`')
local heredoc = '<<' * P(function(input, index)
local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index)
if s == index and delimiter then
local end_heredoc = '[\n\r\f]+'
local _, e = input:find(end_heredoc..delimiter, e)
return e and e + 1 or #input + 1
end
end)
local lit_str = 'q' * P('q')^-1 * literal_delimitted
local lit_array = 'qw' * literal_delimitted
local lit_cmd = 'qx' * literal_delimitted
local lit_tr = (P('tr') + 'y') * literal_delimitted2 * S('cds')^0
local regex_str = #P('/') * lexer.last_char_includes('-<>+*!~\\=%&|^?:;([{') *
lexer.delimited_range('/', true) * S('imosx')^0
local lit_regex = 'qr' * literal_delimitted * S('imosx')^0
local lit_match = 'm' * literal_delimitted * S('cgimosx')^0
local lit_sub = 's' * literal_delimitted2 * S('ecgimosx')^0
lex:add_rule('string',
token(lexer.STRING, sq_str + dq_str + cmd_str + heredoc + lit_str +
lit_array + lit_cmd + lit_tr) +
token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
local line_comment = '#' * lexer.nonnewline_esc^0
local block_comment = lexer.starts_line('=') * lexer.alpha *
(lexer.any - lexer.newline * '=cut')^0 *
(lexer.newline * '=cut')^-1
lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
-- Numbers.
lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
-- Variables.
local special_var = '$' * ('^' * S('ADEFHILMOPSTWX')^-1 +
S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
':' * (lexer.any - ':') +
P('$') * -lexer.word +
lexer.digit^1)
local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#'
lex:add_rule('variable', token(lexer.VARIABLE, special_var + plain_var))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('-<>+*!~\\=/%&|^.?:;()[]{}')))
-- Fold points.
lex:add_fold_point(lexer.OPERATOR, '[', ']')
lex:add_fold_point(lexer.OPERATOR, '{', '}')
lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#'))
return lex