122 lines
4.2 KiB
Lua
Executable File
122 lines
4.2 KiB
Lua
Executable File
-- Copyright 2006-2019 Mitchell mitchell.att.foicica.com. See License.txt.
|
|
-- YAML LPeg lexer.
|
|
-- It does not keep track of indentation perfectly.
|
|
|
|
local lexer = require('lexer')
|
|
local token, word_match = lexer.token, lexer.word_match
|
|
local P, R, S = lpeg.P, lpeg.R, lpeg.S
|
|
|
|
local M = {_NAME = 'yaml'}
|
|
|
|
-- Whitespace.
|
|
local indent = #lexer.starts_line(S(' \t')) *
|
|
(token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1
|
|
local ws = token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1)
|
|
|
|
-- Comments.
|
|
local comment = token(lexer.COMMENT, '#' * lexer.nonnewline^0)
|
|
|
|
-- Strings.
|
|
local string = token(lexer.STRING, lexer.delimited_range("'") +
|
|
lexer.delimited_range('"'))
|
|
|
|
-- Numbers.
|
|
local integer = lexer.dec_num + lexer.hex_num + '0' * S('oO') * R('07')^1
|
|
local special_num = '.' * word_match({'inf', 'nan'}, nil, true)
|
|
local number = token(lexer.NUMBER, special_num + lexer.float + integer)
|
|
|
|
-- Timestamps.
|
|
local ts = token('timestamp',
|
|
lexer.digit * lexer.digit * lexer.digit * lexer.digit * -- year
|
|
'-' * lexer.digit * lexer.digit^-1 * -- month
|
|
'-' * lexer.digit * lexer.digit^-1 * -- day
|
|
((S(' \t')^1 + S('tT'))^-1 * -- separator
|
|
lexer.digit * lexer.digit^-1 * -- hour
|
|
':' * lexer.digit * lexer.digit * -- minute
|
|
':' * lexer.digit * lexer.digit * -- second
|
|
('.' * lexer.digit^0)^-1 * -- fraction
|
|
('Z' + -- timezone
|
|
S(' \t')^0 * S('-+') * lexer.digit * lexer.digit^-1 *
|
|
(':' * lexer.digit * lexer.digit)^-1)^-1)^-1)
|
|
|
|
-- Constants.
|
|
local constant = token(lexer.CONSTANT,
|
|
word_match({'null', 'true', 'false'}, nil, true))
|
|
|
|
-- Types.
|
|
local type = token(lexer.TYPE, '!!' * word_match({
|
|
-- Collection types.
|
|
'map', 'omap', 'pairs', 'set', 'seq',
|
|
-- Scalar types.
|
|
'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp',
|
|
'value', 'yaml'
|
|
}, nil, true) + '!' * lexer.delimited_range('<>'))
|
|
|
|
-- Document boundaries.
|
|
local doc_bounds = token('document', lexer.starts_line(P('---') + '...'))
|
|
|
|
-- Directives
|
|
local directive = token('directive', lexer.starts_line('%') *
|
|
lexer.nonnewline^1)
|
|
|
|
local word = (lexer.alpha + '-' * -lexer.space) * (lexer.alnum + '-')^0
|
|
|
|
-- Keys and literals.
|
|
local colon = S(' \t')^0 * ':' * (lexer.space + -1)
|
|
local key = token(lexer.KEYWORD,
|
|
#word * (lexer.nonnewline - colon)^1 * #colon *
|
|
P(function(input, index)
|
|
local line = input:sub(1, index - 1):match('[^\r\n]+$')
|
|
return not line:find('[%w-]+:') and index
|
|
end))
|
|
local value = #word * (lexer.nonnewline - lexer.space^0 * S(',]}'))^1
|
|
local block = S('|>') * S('+-')^-1 * (lexer.newline + -1) *
|
|
function(input, index)
|
|
local rest = input:sub(index)
|
|
local level = #rest:match('^( *)')
|
|
for pos, indent, line in rest:gmatch('() *()([^\r\n]+)') do
|
|
if indent - pos < level and line ~= ' ' or
|
|
level == 0 and pos > 1 then
|
|
return index + pos - 1
|
|
end
|
|
end
|
|
return #input + 1
|
|
end
|
|
local literal = token('literal', value + block)
|
|
|
|
-- Indicators.
|
|
local anchor = token(lexer.LABEL, '&' * word)
|
|
local alias = token(lexer.VARIABLE, '*' * word)
|
|
local tag = token('tag', '!' * word * P('!')^-1)
|
|
local reserved = token(lexer.ERROR, S('@`') * word)
|
|
local indicator_chars = token(lexer.OPERATOR, S('-?:,[]{}!'))
|
|
|
|
M._rules = {
|
|
{'indent', indent},
|
|
{'whitespace', ws},
|
|
{'comment', comment},
|
|
{'doc_bounds', doc_bounds},
|
|
{'key', key},
|
|
{'string', string},
|
|
{'literal', literal},
|
|
{'timestamp', ts},
|
|
{'number', number},
|
|
{'constant', constant},
|
|
{'type', type},
|
|
{'indicator', tag + indicator_chars + alias + anchor + reserved},
|
|
{'directive', directive},
|
|
}
|
|
|
|
M._tokenstyles = {
|
|
indent_error = 'back:%(color.red)',
|
|
document = lexer.STYLE_CONSTANT,
|
|
literal = lexer.STYLE_DEFAULT,
|
|
timestamp = lexer.STYLE_NUMBER,
|
|
tag = lexer.STYLE_CLASS,
|
|
directive = lexer.STYLE_PREPROCESSOR,
|
|
}
|
|
|
|
M._FOLDBYINDENTATION = true
|
|
|
|
return M
|