From 5dba234be2bc12a3eb9193b0c6d0522e20b6686e Mon Sep 17 00:00:00 2001 From: Nick Treleaven Date: Fri, 23 Mar 2012 16:35:54 +0000 Subject: [PATCH] Minor doc fixes for main description of std.regex These include spelling, typos, usual english/flow and some space alignment. Add missing 'a' or 'the' where necessary. Add 'but no more than m times' to the description for {n,m} quantifier, to differentiate from {n,} description. Add a 'Slicing' heading to separate the Unicode and returning slices sections. --- std/regex.d | 90 +++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/std/regex.d b/std/regex.d index 10a3718f3..7fb4f7863 100644 --- a/std/regex.d +++ b/std/regex.d @@ -1,6 +1,6 @@ //Written in the D programming language /++ - $(LUCKY Regular expressions) are commonly used method of pattern matching + $(LUCKY Regular expressions) are a commonly used method of pattern matching on strings, with $(I regex) being a catchy word for a pattern in this domain specific language. Typical problems usually solved by regular expressions include validation of user input and ubiquitous find & replace @@ -18,7 +18,7 @@ foreach(line; stdin.byLine) { //match returns a range that can be iterated - //to get all of subsequent matches + //to get all subsequent matches foreach(c; match(line, r)) writeln(c.hit); } @@ -30,8 +30,8 @@ //works just like normal regex: auto m2 = match("foo/bar", ctr); //first match found here if any - assert(m2); // be sure to check if there is a match, before examining contents! - assert(m2.captures[1] == "bar");//captures is a range of submatches, 0 - full match + assert(m2); // be sure to check if there is a match before examining contents! + assert(m2.captures[1] == "bar"); //captures is a range of submatches, 0 - full match ... @@ -42,18 +42,18 @@ --- - The general usage guideline is keeping regex complexity on the side of simplicity, + The general usage guideline is to keep regex complexity on the side of simplicity, as its capabilities reside in purely character-level manipulation, - and as such are ill suited for tasks involving higher level invariants - like matching an integer number $(U bounded) in [a,b] interval. + and as such are ill-suited for tasks involving higher level invariants + like matching an integer number $(U bounded) in an [a,b] interval. Checks of this sort of are better addressed by additional post-processing. - The basic syntax shouldn't surprize experienced users of regular expressions. - Thankfully, nowdays the web is bustling with resources to help newcomers, and a good - $(WEB www.regular-expressions.info, reference with tutorial ) on regular expressions - could be found. + The basic syntax shouldn't surprise experienced users of regular expressions. + Thankfully, nowadays the web is bustling with resources to help newcomers, and a good + $(WEB www.regular-expressions.info, reference with tutorial) on regular expressions + can be found. - This library uses ECMAScript syntax flavor with the following extensions: + This library uses an ECMAScript syntax flavor with the following extensions: $(UL $(LI Named subexpressions, with Python syntax. ) $(LI Unicode properties such as Scripts, Blocks and common binary properties e.g Alphabetic, White_Space, Hex_Digit etc.) @@ -62,16 +62,16 @@ $(REG_START Pattern syntax ) $(I std.regex operates on codepoint level, - 'character' in this table denotes single unicode codepoint.) + 'character' in this table denotes a single unicode codepoint.) $(REG_TABLE $(REG_TITLE Pattern element, Semantics ) $(REG_TITLE Atoms, Match single characters ) $(REG_ROW any character except [|*+?(), Matches the character itself. ) $(REG_ROW ., In single line mode matches any charcter. Otherwise it matches any character except '\n' and '\r'. ) - $(REG_ROW [class], Matches single character + $(REG_ROW [class], Matches a single character that belongs to this character class. ) - $(REG_ROW [^class], Matches single character that + $(REG_ROW [^class], Matches a single character that does $(U not) belong to this character class.) $(REG_ROW \cC, Matches the control character corresponding to letter C) $(REG_ROW \xXX, Matches a character with hexadecimal value of XX. ) @@ -83,23 +83,23 @@ $(REG_ROW \t, Matches a tab character. ) $(REG_ROW \v, Matches a vertical tab character. ) $(REG_ROW \d, Matches any unicode digit. ) - $(REG_ROW \D, Matches any character but unicode digit. ) + $(REG_ROW \D, Matches any character but unicode digits. ) $(REG_ROW \w, Matches any word character (note: this includes numbers).) $(REG_ROW \W, Matches any non-word character.) $(REG_ROW \s, Matches whitespace, same as \p{White_Space}.) - $(REG_ROW \S, Matches any character but these recognized as $(I \s ). ) + $(REG_ROW \S, Matches any character but those recognized as $(I \s ). ) $(REG_ROW \\, Matches \ character. ) $(REG_ROW \c where c is one of [|*+?(), Matches the character c itself. ) - $(REG_ROW \p{PropertyName}, Matches character that belongs - to unicode PropertyName set. - Single letter abreviations could be used without surrounding {,}. ) - $(REG_ROW \P{PropertyName}, Matches character that does not belong - to unicode PropertyName set. - Single letter abreviations could be used without surrounding {,}. ) + $(REG_ROW \p{PropertyName}, Matches a character that belongs + to the unicode PropertyName set. + Single letter abbreviations can be used without surrounding {,}. ) + $(REG_ROW \P{PropertyName}, Matches a character that does not belong + to the unicode PropertyName set. + Single letter abbreviations can be used without surrounding {,}. ) $(REG_ROW \p{InBasicLatin}, Matches any character that is part of - BasicLatin unicode $(U block).) + the BasicLatin unicode $(U block).) $(REG_ROW \P{InBasicLatin}, Matches any character except ones in - BasicLatin unicode $(U block).) + the BasicLatin unicode $(U block).) $(REG_ROW \p{Cyrilic}, Matches any character that is part of Cyrilic $(U script).) $(REG_ROW \P{Cyrilic}, Matches any character except ones in @@ -113,57 +113,57 @@ Greedy version - tries as many times as possible.) $(REG_ROW +?, Matches previous character/subexpression 1 or more times. Lazy version - stops as early as possible.) - $(REG_ROW {n}, Matches previous character/subexpression n exactly times. ) + $(REG_ROW {n}, Matches previous character/subexpression exactly n times. ) $(REG_ROW {n,}, Matches previous character/subexpression n times or more. Greedy version - tries as many times as possible. ) $(REG_ROW {n,}?, Matches previous character/subexpression n times or more. Lazy version - stops as early as possible.) $(REG_ROW {n,m}, Matches previous character/subexpression n to m times. - Greedy version - tries as many times as possible. ) + Greedy version - tries as many times as possible, but no more than m times. ) $(REG_ROW {n,m}?, Matches previous character/subexpression n to m times. Lazy version - stops as early as possible, but no less then n times.) $(REG_TITLE Other, Subexpressions & alternations ) $(REG_ROW (regex), Matches subexpression regex, - saving matched portion of text for later retrival. ) + saving matched portion of text for later retrieval. ) $(REG_ROW (?:regex), Matches subexpression regex, $(U not) saving matched portion of text. Useful to speed up matching. ) - $(REG_ROW A|B, Matches subexpression A, failing that matches B. ) + $(REG_ROW A|B, Matches subexpression A, or failing that, matches B. ) $(REG_ROW (?P<name>regex), Matches named subexpression regex labeling it with name 'name'. - When refering to matched portion of text, + When referring to a matched portion of text, names work like aliases in addition to direct numbers. ) - $(REG_TITLE Assertions, Match position rather then character ) + $(REG_TITLE Assertions, Match position rather than character ) $(REG_ROW ^, Matches at the begining of input or line (in multiline mode).) $(REG_ROW $, Matches at the end of input or line (in multiline mode). ) $(REG_ROW \b, Matches at word boundary. ) $(REG_ROW \B, Matches when $(U not) at word boundary. ) $(REG_ROW (?=regex), Zero-width lookahead assertion. Matches at a point where the subexpression - regex could be matched starting from current position. + regex could be matched starting from the current position. ) $(REG_ROW (?!regex), Zero-width negative lookahead assertion. Matches at a point where the subexpression - regex could $(U not ) be matched starting from current position. + regex could $(U not) be matched starting from the current position. ) $(REG_ROW (?<=regex), Zero-width lookbehind assertion. Matches at a point where the subexpression regex could be matched ending - at current position (matching goes backwards). + at the current position (matching goes backwards). ) $(REG_ROW (?