// Scintilla Lexer for EDIFACT // Written by Iain Clarke, IMCSoft & Inobiz AB. // EDIFACT documented here: https://www.unece.org/cefact/edifact/welcome.html // and more readably here: https://en.wikipedia.org/wiki/EDIFACT // This code is subject to the same license terms as the rest of the scintilla project: // The License.txt file describes the conditions under which this software may be distributed. // // Header order must match order in scripts/HeaderOrder.txt #include #include #include #include #include "ILexer.h" #include "Scintilla.h" #include "SciLexer.h" #include "LexAccessor.h" #include "LexerModule.h" #ifdef SCI_NAMESPACE using namespace Scintilla; #endif class LexerEDIFACT : public ILexer { public: LexerEDIFACT(); virtual ~LexerEDIFACT() {} // virtual destructor, as we inherit from ILexer static ILexer *Factory() { return new LexerEDIFACT; } int SCI_METHOD Version() const override { return lvOriginal; } void SCI_METHOD Release() override { delete this; } const char * SCI_METHOD PropertyNames() override { return "fold"; } int SCI_METHOD PropertyType(const char *) override { return SC_TYPE_BOOLEAN; // Only one property! } const char * SCI_METHOD DescribeProperty(const char *name) override { if (strcmp(name, "fold")) return NULL; return "Whether to apply folding to document or not"; } Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override { if (strcmp(key, "fold")) return -1; m_bFold = strcmp(val, "0") ? true : false; return 0; } const char * SCI_METHOD DescribeWordListSets() override { return NULL; } Sci_Position SCI_METHOD WordListSet(int, const char *) override { return -1; } void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess) override; void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess) override; void * SCI_METHOD PrivateCall(int, void *) override { return NULL; } protected: Sci_Position InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength); Sci_Position FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const; Sci_Position ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const; int DetectSegmentHeader(char SegmentHeader[3]) const; bool m_bFold; char m_chComponent; char m_chData; char m_chDecimal; char m_chRelease; char m_chSegment; }; LexerModule lmEDIFACT(SCLEX_EDIFACT, LexerEDIFACT::Factory, "edifact"); /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// LexerEDIFACT::LexerEDIFACT() { m_bFold = false; m_chComponent = ':'; m_chData = '+'; m_chDecimal = '.'; m_chRelease = '?'; m_chSegment = '\''; } void LexerEDIFACT::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess) { Sci_PositionU posFinish = startPos + lengthDoc; InitialiseFromUNA(pAccess, posFinish); // Look backwards for a ' or a document beginning Sci_PositionU posCurrent = FindPreviousEnd(pAccess, startPos); // And jump past the ' if this was not the beginning of the document if (posCurrent != 0) posCurrent++; // Style buffer, so we're not issuing loads of notifications LexAccessor styler (pAccess); pAccess->StartStyling(posCurrent, '\377'); styler.StartSegment(posCurrent); Sci_Position posSegmentStart = -1; while ((posCurrent < posFinish) && (posSegmentStart == -1)) { posCurrent = ForwardPastWhitespace(pAccess, posCurrent, posFinish); // Mark whitespace as default styler.ColourTo(posCurrent - 1, SCE_EDI_DEFAULT); if (posCurrent >= posFinish) break; // Does is start with 3 charaters? ie, UNH char SegmentHeader[4] = { 0 }; pAccess->GetCharRange(SegmentHeader, posCurrent, 3); int SegmentStyle = DetectSegmentHeader(SegmentHeader); if (SegmentStyle == SCE_EDI_BADSEGMENT) break; if (SegmentStyle == SCE_EDI_UNA) { posCurrent += 9; styler.ColourTo(posCurrent - 1, SCE_EDI_UNA); // UNA continue; } posSegmentStart = posCurrent; posCurrent += 3; styler.ColourTo(posCurrent - 1, SegmentStyle); // UNH etc // Colour in the rest of the segment for (char c; posCurrent < posFinish; posCurrent++) { pAccess->GetCharRange(&c, posCurrent, 1); if (c == m_chRelease) // ? escape character, check first, in case of ?' posCurrent++; else if (c == m_chSegment) // ' { // Make sure the whole segment is on one line. styler won't let us go back in time, so we'll settle for marking the ' as bad. Sci_Position lineSegmentStart = pAccess->LineFromPosition(posSegmentStart); Sci_Position lineSegmentEnd = pAccess->LineFromPosition(posCurrent); if (lineSegmentStart == lineSegmentEnd) styler.ColourTo(posCurrent, SCE_EDI_SEGMENTEND); else styler.ColourTo(posCurrent, SCE_EDI_BADSEGMENT); posSegmentStart = -1; posCurrent++; break; } else if (c == m_chComponent) // : styler.ColourTo(posCurrent, SCE_EDI_SEP_COMPOSITE); else if (c == m_chData) // + styler.ColourTo(posCurrent, SCE_EDI_SEP_ELEMENT); else styler.ColourTo(posCurrent, SCE_EDI_DEFAULT); } } styler.Flush(); if (posSegmentStart == -1) return; pAccess->StartStyling(posSegmentStart, -1); pAccess->SetStyleFor(posFinish - posSegmentStart, SCE_EDI_BADSEGMENT); } void LexerEDIFACT::Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess) { if (!m_bFold) return; // Fold at UNx lines. ie, UNx segments = 0, other segments = 1. // There's no sub folding, so we can be quite simple. Sci_Position endPos = startPos + lengthDoc; char SegmentHeader[4] = { 0 }; int iIndentPrevious = 0; Sci_Position lineLast = pAccess->LineFromPosition(endPos); for (Sci_Position lineCurrent = pAccess->LineFromPosition(startPos); lineCurrent <= lineLast; lineCurrent++) { Sci_Position posLineStart = pAccess->LineStart(lineCurrent); posLineStart = ForwardPastWhitespace(pAccess, posLineStart, endPos); Sci_Position lineDataStart = pAccess->LineFromPosition(posLineStart); // Fill in whitespace lines? for (; lineCurrent < lineDataStart; lineCurrent++) pAccess->SetLevel(lineCurrent, SC_FOLDLEVELBASE | SC_FOLDLEVELWHITEFLAG | iIndentPrevious); pAccess->GetCharRange(SegmentHeader, posLineStart, 3); //if (DetectSegmentHeader(SegmentHeader) == SCE_EDI_BADSEGMENT) // Abort if this is not a proper segment header int level = 0; if (memcmp(SegmentHeader, "UNH", 3) == 0) // UNH starts blocks level = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG; // Check for UNA,B and Z. All others are inside messages else if (!memcmp(SegmentHeader, "UNA", 3) || !memcmp(SegmentHeader, "UNB", 3) || !memcmp(SegmentHeader, "UNZ", 3)) level = SC_FOLDLEVELBASE; else level = SC_FOLDLEVELBASE | 1; pAccess->SetLevel(lineCurrent, level); iIndentPrevious = level & SC_FOLDLEVELNUMBERMASK; } } Sci_Position LexerEDIFACT::InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength) { MaxLength -= 9; // drop 9 chars, to give us room for UNA:+.? ' Sci_PositionU startPos = 0; startPos += ForwardPastWhitespace(pAccess, 0, MaxLength); if (startPos < MaxLength) { char bufUNA[9]; pAccess->GetCharRange(bufUNA, startPos, 9); // Check it's UNA segment if (!memcmp(bufUNA, "UNA", 3)) { m_chComponent = bufUNA[3]; m_chData = bufUNA[4]; m_chDecimal = bufUNA[5]; m_chRelease = bufUNA[6]; // bufUNA [7] should be space - reserved. m_chSegment = bufUNA[8]; return 0; // success! } } // We failed to find a UNA, so drop to defaults m_chComponent = ':'; m_chData = '+'; m_chDecimal = '.'; m_chRelease = '?'; m_chSegment = '\''; return -1; } Sci_Position LexerEDIFACT::ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const { char c; while (startPos < MaxLength) { pAccess->GetCharRange(&c, startPos, 1); switch (c) { case '\t': case '\r': case '\n': case ' ': break; default: return startPos; } startPos++; } return MaxLength; } int LexerEDIFACT::DetectSegmentHeader(char SegmentHeader[3]) const { if ( SegmentHeader[0] < 'A' || SegmentHeader[0] > 'Z' || SegmentHeader[1] < 'A' || SegmentHeader[1] > 'Z' || SegmentHeader[2] < 'A' || SegmentHeader[2] > 'Z') return SCE_EDI_BADSEGMENT; if (memcmp(SegmentHeader, "UNA", 3) == 0) return SCE_EDI_UNA; if (memcmp(SegmentHeader, "UNH", 3) == 0) return SCE_EDI_UNH; return SCE_EDI_SEGMENTSTART; } // Look backwards for a ' or a document beginning Sci_Position LexerEDIFACT::FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const { for (char c; startPos > 0; startPos--) { pAccess->GetCharRange(&c, startPos, 1); if (c == m_chSegment) return startPos; } // We didn't find a ', so just go with the beginning return 0; }