// Scintilla source code edit control /** @file LexDataflex.cxx ** Lexer for DataFlex. ** Based on LexPascal.cxx ** Written by Wil van Antwerpen, June 2019 **/ /* // The License.txt file describes the conditions under which this software may be distributed. A few words about features of LexDataflex... Generally speaking LexDataflex tries to support all available DataFlex features (up to DataFlex 19.1 at this time). ~ FOLDING: Folding is supported in the following cases: - Folding of stream-like comments - Folding of groups of consecutive line comments - Folding of preprocessor blocks (the following preprocessor blocks are supported: #IFDEF, #IFNDEF, #ENDIF and #HEADER / #ENDHEADER blocks), - Folding of code blocks on appropriate keywords (the following code blocks are supported: "begin, struct, type, case / end" blocks, class & object declarations and interface declarations) Remarks: - We pass 4 arrays to the lexer: 1. The DataFlex keyword list, these are normal DataFlex keywords 2. The Scope Open list, for example, begin / procedure / while 3. The Scope Close list, for example, end / end_procedure / loop 4. Operator list, for ex. + / - / * / Lt / iand These lists are all mutually exclusive, scope open words should not be in the keyword list and vice versa - Folding of code blocks tries to handle all special cases in which folding should not occur. ~ KEYWORDS: The list of keywords that can be used in dataflex.properties file (up to DataFlex 19.1): - Keywords: .. snipped .. see dataflex.properties file. */ #include #include #include #include #include #include #include "ILexer.h" #include "Scintilla.h" #include "SciLexer.h" #include "WordList.h" #include "LexAccessor.h" #include "Accessor.h" #include "StyleContext.h" #include "CharacterSet.h" #include "LexerModule.h" using namespace Scintilla; static void GetRangeLowered(Sci_PositionU start, Sci_PositionU end, Accessor &styler, char *s, Sci_PositionU len) { Sci_PositionU i = 0; while ((i < end - start + 1) && (i < len-1)) { s[i] = static_cast(tolower(styler[start + i])); i++; } s[i] = '\0'; } static void GetForwardRangeLowered(Sci_PositionU start, CharacterSet &charSet, Accessor &styler, char *s, Sci_PositionU len) { Sci_PositionU i = 0; while ((i < len-1) && charSet.Contains(styler.SafeGetCharAt(start + i))) { s[i] = static_cast(tolower(styler.SafeGetCharAt(start + i))); i++; } s[i] = '\0'; } enum { stateInICode = 0x1000, stateSingleQuoteOpen = 0x2000, stateDoubleQuoteOpen = 0x4000, stateFoldInPreprocessor = 0x0100, stateFoldInCaseStatement = 0x0200, stateFoldInPreprocessorLevelMask = 0x00FF, stateFoldMaskAll = 0x0FFF }; static bool IsFirstDataFlexWord(Sci_Position pos, Accessor &styler) { Sci_Position line = styler.GetLine(pos); Sci_Position start_pos = styler.LineStart(line); for (Sci_Position i = start_pos; i < pos; i++) { char ch = styler.SafeGetCharAt(i); if (!(ch == ' ' || ch == '\t')) return false; } return true; } inline bool IsADataFlexField(int ch) { return (ch == '.'); } size_t _strnlen(const char *s, size_t max) { const char *end = (const char*)memchr((void *)s, 0, max); return end ? (size_t)(end - s) : max; } static void ClassifyDataFlexWord(WordList *keywordlists[], StyleContext &sc, Accessor &styler) { WordList& keywords = *keywordlists[0]; WordList& scopeOpen = *keywordlists[1]; WordList& scopeClosed = *keywordlists[2]; WordList& operators = *keywordlists[3]; char s[100]; int oldState; int newState; size_t tokenlen; oldState = sc.state; newState = oldState; sc.GetCurrentLowered(s, sizeof(s)); tokenlen = _strnlen(s,sizeof(s)); if (keywords.InList(s)) { // keywords in DataFlex can be used as table column names (file.field) and as such they // should not be characterized as a keyword. So test for that. // for ex. somebody using date as field name. if (!IsADataFlexField(sc.GetRelative(-static_cast(tokenlen+1)))) { newState = SCE_DF_WORD; } } if (oldState == newState) { if ((scopeOpen.InList(s) || scopeClosed.InList(s)) && (strcmp(s, "for") != 0) && (strcmp(s, "repeat") != 0)) { // scope words in DataFlex can be used as table column names (file.field) and as such they // should not be characterized as a scope word. So test for that. // for ex. somebody using procedure for field name. if (!IsADataFlexField(sc.GetRelative(-static_cast(tokenlen+1)))) { newState = SCE_DF_SCOPEWORD; } } // no code folding on the next words, but just want to paint them like keywords (as they are) (??? doesn't the code to the opposite?) if (strcmp(s, "if") == 0 || strcmp(s, "ifnot") == 0 || strcmp(s, "case") == 0 || strcmp(s, "else") == 0 ) { newState = SCE_DF_SCOPEWORD; } } if (oldState != newState && newState == SCE_DF_WORD) { // a for loop must have for at the start of the line, for is also used in "define abc for 123" if ( (strcmp(s, "for") == 0) && (IsFirstDataFlexWord(sc.currentPos-3, styler)) ) { newState = SCE_DF_SCOPEWORD; } } if (oldState != newState && newState == SCE_DF_WORD) { // a repeat loop must have repeat at the start of the line, repeat is also used in 'move (repeat("d",5)) to sFoo' if ( (strcmp(s, "repeat") == 0) && (IsFirstDataFlexWord(sc.currentPos-6, styler)) ) { newState = SCE_DF_SCOPEWORD; } } if (oldState == newState) { if (operators.InList(s)) { newState = SCE_DF_OPERATOR; } } if (oldState != newState) { sc.ChangeState(newState); } sc.SetState(SCE_DF_DEFAULT); } static void ColouriseDataFlexDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler) { // bool bSmartHighlighting = styler.GetPropertyInt("lexer.dataflex.smart.highlighting", 1) != 0; CharacterSet setWordStart(CharacterSet::setAlpha, "_$#@", 0x80, true); CharacterSet setWord(CharacterSet::setAlphaNum, "_$#@", 0x80, true); CharacterSet setNumber(CharacterSet::setDigits, ".-+eE"); CharacterSet setHexNumber(CharacterSet::setDigits, "abcdefABCDEF"); CharacterSet setOperator(CharacterSet::setNone, "*+-/<=>^"); Sci_Position curLine = styler.GetLine(startPos); int curLineState = curLine > 0 ? styler.GetLineState(curLine - 1) : 0; StyleContext sc(startPos, length, initStyle, styler); for (; sc.More(); sc.Forward()) { if (sc.atLineEnd) { // Update the line state, so it can be seen by next line curLine = styler.GetLine(sc.currentPos); styler.SetLineState(curLine, curLineState); } // Determine if the current state should terminate. switch (sc.state) { case SCE_DF_NUMBER: if (!setNumber.Contains(sc.ch) || (sc.ch == '.' && sc.chNext == '.')) { sc.SetState(SCE_DF_DEFAULT); } else if (sc.ch == '-' || sc.ch == '+') { if (sc.chPrev != 'E' && sc.chPrev != 'e') { sc.SetState(SCE_DF_DEFAULT); } } break; case SCE_DF_IDENTIFIER: if (!setWord.Contains(sc.ch)) { ClassifyDataFlexWord(keywordlists, sc, styler); } break; case SCE_DF_HEXNUMBER: if (!(setHexNumber.Contains(sc.ch) || sc.ch == 'I') ) { // in |CI$22a we also want to color the "I" sc.SetState(SCE_DF_DEFAULT); } break; case SCE_DF_METATAG: if (sc.atLineStart || sc.chPrev == '}') { sc.SetState(SCE_DF_DEFAULT); } break; case SCE_DF_PREPROCESSOR: if (sc.atLineStart || IsASpaceOrTab(sc.ch)) { sc.SetState(SCE_DF_DEFAULT); } break; case SCE_DF_IMAGE: if (sc.atLineStart && sc.Match("/*")) { sc.Forward(); // these characters are still part of the DF Image sc.ForwardSetState(SCE_DF_DEFAULT); } break; case SCE_DF_PREPROCESSOR2: // we don't have inline comments or preprocessor2 commands //if (sc.Match('*', ')')) { // sc.Forward(); // sc.ForwardSetState(SCE_DF_DEFAULT); //} break; case SCE_DF_COMMENTLINE: if (sc.atLineStart) { sc.SetState(SCE_DF_DEFAULT); } break; case SCE_DF_STRING: if (sc.atLineEnd) { sc.ChangeState(SCE_DF_STRINGEOL); } else if (sc.ch == '\'' && sc.chNext == '\'') { sc.Forward(); } else if (sc.ch == '\"' && sc.chNext == '\"') { sc.Forward(); } else if (sc.ch == '\'' || sc.ch == '\"') { if (sc.ch == '\'' && (curLineState & stateSingleQuoteOpen) ) { curLineState &= ~(stateSingleQuoteOpen); sc.ForwardSetState(SCE_DF_DEFAULT); } else if (sc.ch == '\"' && (curLineState & stateDoubleQuoteOpen) ) { curLineState &= ~(stateDoubleQuoteOpen); sc.ForwardSetState(SCE_DF_DEFAULT); } } break; case SCE_DF_STRINGEOL: if (sc.atLineStart) { sc.SetState(SCE_DF_DEFAULT); } break; case SCE_DF_SCOPEWORD: //if (!setHexNumber.Contains(sc.ch) && sc.ch != '$') { // sc.SetState(SCE_DF_DEFAULT); //} break; case SCE_DF_OPERATOR: // if (bSmartHighlighting && sc.chPrev == ';') { // curLineState &= ~(stateInProperty | stateInExport); // } sc.SetState(SCE_DF_DEFAULT); break; case SCE_DF_ICODE: if (sc.atLineStart || IsASpace(sc.ch) || isoperator(sc.ch)) { sc.SetState(SCE_DF_DEFAULT); } break; } // Determine if a new state should be entered. if (sc.state == SCE_DF_DEFAULT) { if (IsADigit(sc.ch)) { sc.SetState(SCE_DF_NUMBER); } else if (sc.Match('/', '/') || sc.Match("#REM")) { sc.SetState(SCE_DF_COMMENTLINE); } else if ((sc.ch == '#' && !sc.Match("#REM")) && IsFirstDataFlexWord(sc.currentPos, styler)) { sc.SetState(SCE_DF_PREPROCESSOR); // || (sc.ch == '|' && sc.chNext == 'C' && sc.GetRelativeCharacter(2) == 'I' && sc.GetRelativeCharacter(3) == '$') ) { } else if ((sc.ch == '$' && ((!setWord.Contains(sc.chPrev)) || sc.chPrev == 'I' ) ) || (sc.Match("|CI$")) ) { sc.SetState(SCE_DF_HEXNUMBER); // start with $ and previous character not in a..zA..Z0..9 excluding "I" OR start with |CI$ } else if (setWordStart.Contains(sc.ch)) { sc.SetState(SCE_DF_IDENTIFIER); } else if (sc.ch == '{') { sc.SetState(SCE_DF_METATAG); //} else if (sc.Match("(*$")) { // sc.SetState(SCE_DF_PREPROCESSOR2); } else if (sc.ch == '/' && setWord.Contains(sc.chNext) && sc.atLineStart) { sc.SetState(SCE_DF_IMAGE); // sc.Forward(); // Eat the * so it isn't used for the end of the comment } else if (sc.ch == '\'' || sc.ch == '\"') { if (sc.ch == '\'' && !(curLineState & stateDoubleQuoteOpen)) { curLineState |= stateSingleQuoteOpen; } else if (sc.ch == '\"' && !(curLineState & stateSingleQuoteOpen)) { curLineState |= stateDoubleQuoteOpen; } sc.SetState(SCE_DF_STRING); } else if (setOperator.Contains(sc.ch)) { sc.SetState(SCE_DF_OPERATOR); // } else if (curLineState & stateInICode) { // ICode start ! in a string followed by close string mark is not icode } else if ((sc.ch == '!') && !(sc.ch == '!' && ((sc.chNext == '\"') || (sc.ch == '\'')) )) { sc.SetState(SCE_DF_ICODE); } } } if (sc.state == SCE_DF_IDENTIFIER && setWord.Contains(sc.chPrev)) { ClassifyDataFlexWord(keywordlists, sc, styler); } sc.Complete(); } static bool IsStreamCommentStyle(int style) { return style == SCE_DF_IMAGE; } static bool IsCommentLine(Sci_Position line, Accessor &styler) { Sci_Position pos = styler.LineStart(line); Sci_Position eolPos = styler.LineStart(line + 1) - 1; for (Sci_Position i = pos; i < eolPos; i++) { char ch = styler[i]; char chNext = styler.SafeGetCharAt(i + 1); int style = styler.StyleAt(i); if (ch == '/' && chNext == '/' && style == SCE_DF_COMMENTLINE) { return true; } else if (!IsASpaceOrTab(ch)) { return false; } } return false; } static unsigned int GetFoldInPreprocessorLevelFlag(int lineFoldStateCurrent) { return lineFoldStateCurrent & stateFoldInPreprocessorLevelMask; } static void SetFoldInPreprocessorLevelFlag(int &lineFoldStateCurrent, unsigned int nestLevel) { lineFoldStateCurrent &= ~stateFoldInPreprocessorLevelMask; lineFoldStateCurrent |= nestLevel & stateFoldInPreprocessorLevelMask; } static int ClassifyDataFlexPreprocessorFoldPoint(int &levelCurrent, int &lineFoldStateCurrent, Sci_PositionU startPos, Accessor &styler) { CharacterSet setWord(CharacterSet::setAlpha); char s[100]; // Size of the longest possible keyword + one additional character + null GetForwardRangeLowered(startPos, setWord, styler, s, sizeof(s)); size_t iLen = _strnlen(s,sizeof(s)); size_t iWordSize = 0; unsigned int nestLevel = GetFoldInPreprocessorLevelFlag(lineFoldStateCurrent); if (strcmp(s, "command") == 0 || // The #if/#ifdef etcetera commands are not currently foldable as it is easy to write code that // breaks the collaps logic, so we keep things simple and not include that for now. strcmp(s, "header") == 0) { nestLevel++; SetFoldInPreprocessorLevelFlag(lineFoldStateCurrent, nestLevel); lineFoldStateCurrent |= stateFoldInPreprocessor; levelCurrent++; iWordSize = iLen; } else if (strcmp(s, "endcommand") == 0 || strcmp(s, "endheader") == 0) { nestLevel--; SetFoldInPreprocessorLevelFlag(lineFoldStateCurrent, nestLevel); if (nestLevel == 0) { lineFoldStateCurrent &= ~stateFoldInPreprocessor; } levelCurrent--; iWordSize = iLen; if (levelCurrent < SC_FOLDLEVELBASE) { levelCurrent = SC_FOLDLEVELBASE; } } return static_cast(iWordSize); } static void ClassifyDataFlexWordFoldPoint(int &levelCurrent, int &lineFoldStateCurrent, Sci_PositionU lastStart, Sci_PositionU currentPos, WordList *[], Accessor &styler) { char s[100]; // property fold.dataflex.compilerlist // Set to 1 for enabling the code folding feature in *.prn files bool foldPRN = styler.GetPropertyInt("fold.dataflex.compilerlist",0) != 0; GetRangeLowered(lastStart, currentPos, styler, s, sizeof(s)); if (strcmp(s, "case") == 0) { lineFoldStateCurrent |= stateFoldInCaseStatement; } else if (strcmp(s, "begin") == 0) { levelCurrent++; } else if (strcmp(s, "for") == 0 || strcmp(s, "while") == 0 || strcmp(s, "repeat") == 0 || strcmp(s, "for_all") == 0 || strcmp(s, "struct") == 0 || strcmp(s, "type") == 0 || strcmp(s, "begin_row") == 0 || strcmp(s, "item_list") == 0 || strcmp(s, "begin_constraints") == 0 || strcmp(s, "begin_transaction") == 0 || strcmp(s, "enum_list") == 0 || strcmp(s, "class") == 0 || strcmp(s, "object") == 0 || strcmp(s, "cd_popup_object") == 0 || strcmp(s, "procedure") == 0 || strcmp(s, "procedure_section") == 0 || strcmp(s, "function") == 0 ) { if ((IsFirstDataFlexWord(lastStart, styler )) || foldPRN) { levelCurrent++; } } else if (strcmp(s, "end") == 0) { // end is not always the first keyword, for example "case end" levelCurrent--; if (levelCurrent < SC_FOLDLEVELBASE) { levelCurrent = SC_FOLDLEVELBASE; } } else if (strcmp(s, "loop") == 0 || strcmp(s, "until") == 0 || strcmp(s, "end_class") == 0 || strcmp(s, "end_object") == 0 || strcmp(s, "cd_end_object") == 0 || strcmp(s, "end_procedure") == 0 || strcmp(s, "end_function") == 0 || strcmp(s, "end_for_all") == 0 || strcmp(s, "end_struct") == 0 || strcmp(s, "end_type") == 0 || strcmp(s, "end_row") == 0 || strcmp(s, "end_item_list") == 0 || strcmp(s, "end_constraints") == 0 || strcmp(s, "end_transaction") == 0 || strcmp(s, "end_enum_list") == 0 ) { // lineFoldStateCurrent &= ~stateFoldInRecord; if ((IsFirstDataFlexWord(lastStart, styler )) || foldPRN) { levelCurrent--; if (levelCurrent < SC_FOLDLEVELBASE) { levelCurrent = SC_FOLDLEVELBASE; } } } } static void ClassifyDataFlexMetaDataFoldPoint(int &levelCurrent, Sci_PositionU lastStart, Sci_PositionU currentPos, WordList *[], Accessor &styler) { char s[100]; GetRangeLowered(lastStart, currentPos, styler, s, sizeof(s)); if (strcmp(s, "#beginsection") == 0) { levelCurrent++; } else if (strcmp(s, "#endsection") == 0) { levelCurrent--; if (levelCurrent < SC_FOLDLEVELBASE) { levelCurrent = SC_FOLDLEVELBASE; } } } static void FoldDataFlexDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler) { bool foldComment = styler.GetPropertyInt("fold.comment") != 0; bool foldPreprocessor = styler.GetPropertyInt("fold.preprocessor") != 0; bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; Sci_PositionU endPos = startPos + length; int visibleChars = 0; Sci_Position lineCurrent = styler.GetLine(startPos); int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK; int levelCurrent = levelPrev; int lineFoldStateCurrent = lineCurrent > 0 ? styler.GetLineState(lineCurrent - 1) & stateFoldMaskAll : 0; char chNext = styler[startPos]; int styleNext = styler.StyleAt(startPos); int style = initStyle; int iWordSize; Sci_Position lastStart = 0; CharacterSet setWord(CharacterSet::setAlphaNum, "_$#@", 0x80, true); for (Sci_PositionU i = startPos; i < endPos; i++) { char ch = chNext; chNext = styler.SafeGetCharAt(i + 1); int stylePrev = style; style = styleNext; styleNext = styler.StyleAt(i + 1); bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); if (foldComment && IsStreamCommentStyle(style)) { if (!IsStreamCommentStyle(stylePrev)) { levelCurrent++; } else if (!IsStreamCommentStyle(styleNext)) { levelCurrent--; } } if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) { if (!IsCommentLine(lineCurrent - 1, styler) && IsCommentLine(lineCurrent + 1, styler)) levelCurrent++; else if (IsCommentLine(lineCurrent - 1, styler) && !IsCommentLine(lineCurrent+1, styler)) levelCurrent--; } if (foldPreprocessor) { if (style == SCE_DF_PREPROCESSOR) { iWordSize = ClassifyDataFlexPreprocessorFoldPoint(levelCurrent, lineFoldStateCurrent, i + 1, styler); //} else if (style == SCE_DF_PREPROCESSOR2 && ch == '(' && chNext == '*' // && styler.SafeGetCharAt(i + 2) == '$') { // ClassifyDataFlexPreprocessorFoldPoint(levelCurrent, lineFoldStateCurrent, i + 3, styler); i = i + iWordSize; } } if (stylePrev != SCE_DF_SCOPEWORD && style == SCE_DF_SCOPEWORD) { // Store last word start point. lastStart = i; } if (stylePrev == SCE_DF_SCOPEWORD) { if(setWord.Contains(ch) && !setWord.Contains(chNext)) { ClassifyDataFlexWordFoldPoint(levelCurrent, lineFoldStateCurrent, lastStart, i, keywordlists, styler); } } if (stylePrev == SCE_DF_METATAG && ch == '#') { // Store last word start point. lastStart = i; } if (stylePrev == SCE_DF_METATAG) { if(setWord.Contains(ch) && !setWord.Contains(chNext)) { ClassifyDataFlexMetaDataFoldPoint(levelCurrent, lastStart, i, keywordlists, styler); } } if (!IsASpace(ch)) visibleChars++; if (atEOL) { int lev = levelPrev; if (visibleChars == 0 && foldCompact) lev |= SC_FOLDLEVELWHITEFLAG; if ((levelCurrent > levelPrev) && (visibleChars > 0)) lev |= SC_FOLDLEVELHEADERFLAG; if (lev != styler.LevelAt(lineCurrent)) { styler.SetLevel(lineCurrent, lev); } int newLineState = (styler.GetLineState(lineCurrent) & ~stateFoldMaskAll) | lineFoldStateCurrent; styler.SetLineState(lineCurrent, newLineState); lineCurrent++; levelPrev = levelCurrent; visibleChars = 0; } } // If we didn't reach the EOL in previous loop, store line level and whitespace information. // The rest will be filled in later... int lev = levelPrev; if (visibleChars == 0 && foldCompact) lev |= SC_FOLDLEVELWHITEFLAG; styler.SetLevel(lineCurrent, lev); } static const char * const dataflexWordListDesc[] = { "Keywords", "Scope open", "Scope close", "Operators", 0 }; LexerModule lmDataflex(SCLEX_DATAFLEX, ColouriseDataFlexDoc, "dataflex", FoldDataFlexDoc, dataflexWordListDesc);