88 lines
3.4 KiB
Lua
88 lines
3.4 KiB
Lua
|
-- Copyright 2006-2019 Mitchell mitchell.att.foicica.com. See License.txt.
|
||
|
-- C LPeg lexer.
|
||
|
|
||
|
local lexer = require('lexer')
|
||
|
local token, word_match = lexer.token, lexer.word_match
|
||
|
local P, R, S = lpeg.P, lpeg.R, lpeg.S
|
||
|
|
||
|
local lex = lexer.new('ansi_c')
|
||
|
|
||
|
-- Whitespace.
|
||
|
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
|
||
|
|
||
|
-- Keywords.
|
||
|
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
|
||
|
auto break case const continue default do else extern for goto if inline
|
||
|
register restrict return sizeof static switch typedef volatile while
|
||
|
-- C11.
|
||
|
_Alignas _Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local
|
||
|
]]))
|
||
|
|
||
|
-- Types.
|
||
|
lex:add_rule('type', token(lexer.TYPE, word_match[[
|
||
|
char double enum float int long short signed struct union unsigned void
|
||
|
_Bool _Complex _Imaginary
|
||
|
-- Stdlib types.
|
||
|
ptrdiff_t size_t max_align_t wchar_t intptr_t uintptr_t intmax_t uintmax_t
|
||
|
]] + P('u')^-1 * 'int' * (P('_least') + '_fast')^-1 * R('09')^1 * '_t'))
|
||
|
|
||
|
-- Constants.
|
||
|
lex:add_rule('constants', token(lexer.CONSTANT, word_match[[
|
||
|
NULL
|
||
|
-- Preprocessor.
|
||
|
__DATE__ __FILE__ __LINE__ __TIME__ __func__
|
||
|
-- errno.h.
|
||
|
E2BIG EACCES EADDRINUSE EADDRNOTAVAIL EAFNOSUPPORT EAGAIN EALREADY EBADF
|
||
|
EBADMSG EBUSY ECANCELED ECHILD ECONNABORTED ECONNREFUSED ECONNRESET EDEADLK
|
||
|
EDESTADDRREQ EDOM EDQUOT EEXIST EFAULT EFBIG EHOSTUNREACH EIDRM EILSEQ
|
||
|
EINPROGRESS EINTR EINVAL EIO EISCONN EISDIR ELOOP EMFILE EMLINK EMSGSIZE
|
||
|
EMULTIHOP ENAMETOOLONG ENETDOWN ENETRESET ENETUNREACH ENFILE ENOBUFS ENODATA
|
||
|
ENODEV ENOENT ENOEXEC ENOLCK ENOLINK ENOMEM ENOMSG ENOPROTOOPT ENOSPC ENOSR
|
||
|
ENOSTR ENOSYS ENOTCONN ENOTDIR ENOTEMPTY ENOTRECOVERABLE ENOTSOCK ENOTSUP
|
||
|
ENOTTY ENXIO EOPNOTSUPP EOVERFLOW EOWNERDEAD EPERM EPIPE EPROTO
|
||
|
EPROTONOSUPPORT EPROTOTYPE ERANGE EROFS ESPIPE ESRCH ESTALE ETIME ETIMEDOUT
|
||
|
ETXTBSY EWOULDBLOCK EXDEV
|
||
|
]]))
|
||
|
|
||
|
-- Identifiers.
|
||
|
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
|
||
|
|
||
|
-- Strings.
|
||
|
local sq_str = P('L')^-1 * lexer.delimited_range("'", true)
|
||
|
local dq_str = P('L')^-1 * lexer.delimited_range('"', true)
|
||
|
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
|
||
|
|
||
|
-- Comments.
|
||
|
local line_comment = '//' * lexer.nonnewline_esc^0
|
||
|
local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +
|
||
|
'#if' * S(' \t')^0 * '0' * lexer.space *
|
||
|
(lexer.any - '#endif')^0 * P('#endif')^-1
|
||
|
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
|
||
|
|
||
|
-- Numbers.
|
||
|
lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
|
||
|
|
||
|
-- Preprocessor.
|
||
|
local preproc_word = word_match[[
|
||
|
define elif else endif if ifdef ifndef line pragma undef
|
||
|
]]
|
||
|
lex:add_rule('preprocessor',
|
||
|
(token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) +
|
||
|
token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
|
||
|
(token(lexer.WHITESPACE, S('\t ')^1) *
|
||
|
token(lexer.STRING,
|
||
|
lexer.delimited_range('<>', true, true)))^-1))
|
||
|
|
||
|
-- Operators.
|
||
|
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}')))
|
||
|
|
||
|
-- Fold points.
|
||
|
lex:add_fold_point(lexer.PREPROCESSOR, '#if', '#endif')
|
||
|
lex:add_fold_point(lexer.PREPROCESSOR, '#ifdef', '#endif')
|
||
|
lex:add_fold_point(lexer.PREPROCESSOR, '#ifndef', '#endif')
|
||
|
lex:add_fold_point(lexer.OPERATOR, '{', '}')
|
||
|
lex:add_fold_point(lexer.COMMENT, '/*', '*/')
|
||
|
lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//'))
|
||
|
|
||
|
return lex
|