mirror of
git://git.gnu.org.ua/wordsplit.git
synced 2025-04-26 00:29:54 +03:00
Finish wordsplit docs, improve tests
This commit is contained in:
parent
9bebcfbc1c
commit
943d725e7f
5 changed files with 472 additions and 90 deletions
298
doc/wordsplit.3
298
doc/wordsplit.3
|
@ -14,7 +14,7 @@
|
|||
.\" You should have received a copy of the GNU General Public License
|
||||
.\" along with Grecs. If not, see <http://www.gnu.org/licenses/>.
|
||||
.\"
|
||||
.TH WORDSPLIT 3 "October 28, 2014" "GRECS" "Grecs User Reference"
|
||||
.TH WORDSPLIT 3 "October 30, 2014" "GRECS" "Grecs User Reference"
|
||||
.SH NAME
|
||||
wordsplit \- split string into words
|
||||
.SH SYNOPSIS
|
||||
|
@ -39,8 +39,7 @@ wordsplit \- split string into words
|
|||
\fBvoid wordsplit_clearerr (wordsplit_t *\fIws\fB);\fR
|
||||
.SH DESCRIPTION
|
||||
The function \fBwordsplit\fR splits the string \fIs\fR into words
|
||||
using a set of rules governed by \fIflags\fR and stores the result
|
||||
in the memory location pointed to by \fIws\fR. Depending on
|
||||
using a set of rules governed by \fIflags\fR. Depending on
|
||||
\fIflags\fR, the function performs the following: whitespace trimming,
|
||||
tilde expansion, variable expansion, quote removal, command
|
||||
substitution, and path expansion. On success, the function returns 0
|
||||
|
@ -96,11 +95,39 @@ not try to alter or deallocate it.
|
|||
The function
|
||||
.B wordsplit_clearerr
|
||||
clears the error condition associated with \fIws\fR.
|
||||
.SH INCREMENTAL MODE
|
||||
In incremental mode \fBwordsplit\fR parses one word per invocation.
|
||||
It returns \fBWRDSF_OK\fR on success and \fBWRDSF_NOINPUT\fR when it
|
||||
has processed entire input string.
|
||||
.PP
|
||||
This mode is enabled if the flag \fBWRDSF_INCREMENTAL\fR is set in
|
||||
the \fIflags\fR argument. Subsequent calls to \fBwordsplit\fR must
|
||||
have \fBNULL\fR as first argument. Each successful
|
||||
call will return exactly one word in \fBws.ws_wordv[0]\fR.
|
||||
.PP
|
||||
An example usage:
|
||||
.PP
|
||||
.EX
|
||||
wordsplit_t ws;
|
||||
int rc;
|
||||
flags = WRDSF_DEFFLAGS|WRDSF_INCREMENTAL;
|
||||
|
||||
for (rc = wordsplit(s, &ws, flags); rc == WRDSF_OK;
|
||||
rc = wordsplit(NULL, &ws, flags)) {
|
||||
process(ws.ws_wordv[0]);
|
||||
}
|
||||
|
||||
if (rc != WRDSE_NOINPUT)
|
||||
wordsplit_perror(&ws);
|
||||
|
||||
wordsplit_free(&ws);
|
||||
.EE
|
||||
.SH EXPANSION
|
||||
The number of expansions performed on the input is controlled by
|
||||
appropriate bits set in the \fIflags\fR argument. Whatever expansions
|
||||
are enabled, they are always run in the same order as described in this
|
||||
section.
|
||||
Expansion is performed on the input after it has been split into
|
||||
words. There are several kinds of expansion, which of them are
|
||||
performed is controlled by appropriate bits set in the \fIflags\fR
|
||||
argument. Whatever expansion kinds are enabled, they are always run
|
||||
in the same order as described in this section.
|
||||
.SS Whitespace trimming
|
||||
Whitespace trimming removes any leading and trailing whitespace from
|
||||
the initial word array. It is enabled by the
|
||||
|
@ -206,8 +233,153 @@ Otherwise, the value of \fIvariable\fR is substituted.
|
|||
If \fIvariable\fR is null or unset, nothing is substituted, otherwise the
|
||||
expansion of \fIword\fR is substituted.
|
||||
.SS Quote removal
|
||||
Quote removal translates unquoted escape sequences into corresponding bytes.
|
||||
An escape sequence is a backslash followed by one or more characters. By
|
||||
default, each sequence \fB\\\fIC\fR appearing in unquoted words is
|
||||
replaced with the character \fIC\fR. In doubly-quoted strings, two
|
||||
backslash sequences are recognized: \fB\\\\\fR translates to a single
|
||||
backslash, and \fB\\\(dq\fR translates to a double-quote.
|
||||
.PP
|
||||
Two flags are provided to modify this behavior. If
|
||||
.I WRDSF_CESCAPES
|
||||
flag is set, the following escape sequences are recognized:
|
||||
.sp
|
||||
.nf
|
||||
.ta 8n 18n 42n
|
||||
.ul
|
||||
Sequence Expansion ASCII
|
||||
\fB\\\\\fR \fB\\\fR 134
|
||||
\fB\\\(dq\fR \fB\(dq\fR 042
|
||||
\fB\\a\fR audible bell 007
|
||||
\fB\\b\fR backspace 010
|
||||
\fB\\f\fR form-feed 014
|
||||
\fB\\n\fR new line 012
|
||||
\fB\\r\fR charriage return 015
|
||||
\fB\\t\fR horizontal tabulation 011
|
||||
\fB\\v\fR vertical tabulation 013
|
||||
.fi
|
||||
.sp
|
||||
The sequence \fB\\x\fINN\fR or \fB\\X\fINN\fR, where \fINN\fR stands
|
||||
for a two-digit hex number is replaced with ASCII character \fINN\fR.
|
||||
The sequence \fB\\0\fINNN\fR, where \fINNN\fR stands for a three-digit
|
||||
octal number is replaced with ASCII character whose code is \fINNN\fR.
|
||||
.PP
|
||||
The \fBWRDSF_ESCAPE\fR flag allows the caller to customize escape
|
||||
sequences. If it is set, the \fBws_escape\fR member must be
|
||||
initialized. This member provides escape tables for unquoted words
|
||||
(\fBws_escape[0]\fR) and quoted strings (\fBws_escape[1]\fR). Each
|
||||
table is a string consisting of even number of charactes. In each
|
||||
pair of characters, the first one is a character that can appear after
|
||||
backslash, and the following one is its translation. For example, the
|
||||
above table of C escapes is represented as
|
||||
\fB\(dqa\\ab\\bf\\fn\\nr\\rt\\tv\\v\(dq\fR.
|
||||
.PP
|
||||
It is valid to initialize \fBws_escape\fR elements to zero. In this
|
||||
case, no backslash translation occurs.
|
||||
.PP
|
||||
The handling if octal and hex escapes is controlled by the following
|
||||
bits in \fBws_options\fR:
|
||||
.TP
|
||||
.B WRDSO_BSKEEP_WORD
|
||||
When an unrecognized escape sequence is encountered in a word,
|
||||
preserve it on output. If that bit is not set, the backslash is
|
||||
removed from such sequences.
|
||||
.TP
|
||||
.B WRDSO_OESC_WORD
|
||||
Handle octal escapes in words.
|
||||
.TP
|
||||
.B WRDSO_XESC_WORD
|
||||
Handle hex escapes in words.
|
||||
.TP
|
||||
.B WRDSO_BSKEEP_QUOTE
|
||||
When an unrecognized escape sequence is encountered in a doubly-quoted
|
||||
string, preserve it on output. If that bit is not set, the backslash is
|
||||
removed from such sequences.
|
||||
.TP
|
||||
.B WRDSO_OESC_QUOTE
|
||||
Handle octal escapes in doubly-quoted strings.
|
||||
.TP
|
||||
.B WRDSO_XESC_QUOTE
|
||||
Handle hex escapes in doubly-quoted strings.
|
||||
.SS Command substitution
|
||||
.SS Path expansion
|
||||
During \fIcommand substitution\fR, each word is scanned for commands.
|
||||
Each command found is executed and replaced by the output it creates.
|
||||
.PP
|
||||
The syntax is:
|
||||
.PP
|
||||
.RS +4
|
||||
.BI $( command )
|
||||
.RE
|
||||
.PP
|
||||
Command substitutions may be nested.
|
||||
.PP
|
||||
Unless the substitution appears within double quotes, word splitting and
|
||||
pathname expansion are performed on its result.
|
||||
.PP
|
||||
To enable command substitution, the caller must initialize the
|
||||
.I ws_command
|
||||
member with the address of the substitution function and make sure the
|
||||
.B WRDSF_NOCMD
|
||||
flag is not set.
|
||||
.PP
|
||||
The substitution function should be defined as follows:
|
||||
.PP
|
||||
.RS +4
|
||||
\fBint \fIcommand\fB\
|
||||
(char **\fIret\fB,\
|
||||
const char *\fIcmd\fB,\
|
||||
size_t \fIlen,\fB\
|
||||
char **\fIargv\fB,\
|
||||
void *\fIclos\fB);\fR
|
||||
.RE
|
||||
.PP
|
||||
First \fIlen\fR bytes of \fIcmd\fR contain the command invocation as
|
||||
it appeared between
|
||||
.BR $( and ),
|
||||
with all expansions performed. If the
|
||||
.I WRDSO_ARGV
|
||||
option is set, the parameter \fIargv\fR contains the command line split into
|
||||
words using the same settings as the input \fIws\fR structure.
|
||||
Otherwise, \fIargv\fR is \fBNULL\fR.
|
||||
.PP
|
||||
The \fIclos\fR parameter supplies user-specific data, passed in the
|
||||
\fIws_closure\fR member).
|
||||
.PP
|
||||
On success, the function stores a pointer to the
|
||||
output string in the memory location pointed to by \fIret\fR and
|
||||
returns \fBWRDSE_OK\fR (\fB0\fR). On error, it must return one of the
|
||||
error codes described in the section
|
||||
.BR "ERROR CODES" .
|
||||
If
|
||||
.BR WRDSE_USERERR ,
|
||||
is returned, a pointer to the error description string must be stored in
|
||||
.BR *ret .
|
||||
.PP
|
||||
When \fBWRDSE_OK\fR or \fBWRDSE_USERERR\fR is returned, the
|
||||
data stored in \fB*ret\fR must be allocated using
|
||||
.BR malloc (3).
|
||||
.SS Pathname expansion
|
||||
Pathname expansion is performed if the \fBWRDSF_PATHEXPAND\fR flag is
|
||||
set. Each unquoted word is scanned for characters
|
||||
.BR * , ? ", and " [ .
|
||||
If one of these appears, the word is considered a \fIpattern\fR (in
|
||||
the sense of
|
||||
.BR glob (3))
|
||||
and is replaced with an alphabetically sorted list of file names matching the
|
||||
pattern.
|
||||
.PP
|
||||
If no matches are found for a word
|
||||
and the \fIws_options\fR member has the
|
||||
.B WRDSO_NULLGLOB
|
||||
bit set, the word is removed.
|
||||
.PP
|
||||
If the \fBWRDSO_FAILGLOB\fR option is set, an error message is output
|
||||
for each such word using
|
||||
.IR ws_error .
|
||||
.PP
|
||||
When matching a pattern, the dot at the start of a name or immediately
|
||||
following a slash must be matched explicitly, unless
|
||||
the \fBWRDSO_DOTGLOB\fR option is set,
|
||||
.SH WORDSPLIT_T STRUCTURE
|
||||
The data type \fBwordsplit_t\fR has three members that contain
|
||||
output data upon return from \fBwordsplit\fR or \fBwordsplit_len\fR,
|
||||
|
@ -264,8 +436,15 @@ If initialized on input, the
|
|||
.B WRDSF_COMMENT
|
||||
flag must be set. By default, it's value is \fB\(dq#\(dq\fR.
|
||||
.TP
|
||||
.BI "const char *" ws_escape
|
||||
Characters to be escaped with backslash. The
|
||||
.BI "const char *" ws_escape [2]
|
||||
Escape tables for unquoted words (\fBws_escape[0]\fR) and quoted
|
||||
strings (\fBws_escape[1]\fR). These are used to translate escape
|
||||
sequences (\fB\\\fIC\fR) into characters. Each table is a string
|
||||
consisting of even number of charactes. In each pair of characters,
|
||||
the first one is a character that can appear after backslash, and the
|
||||
following one is its representation. For example, the string
|
||||
\fB\(dqt\\tn\\n\(dq\fR translates \fB\\t\fR into horisontal
|
||||
tabulation character and \fB\\n\fR into newline.
|
||||
.B WRDSF_ESCAPE
|
||||
flag must be set if this member is initialized.
|
||||
.TP
|
||||
|
@ -367,7 +546,7 @@ flag must be set.
|
|||
const char *cmd,\
|
||||
size_t len,\
|
||||
char **argv,\
|
||||
void *clos)
|
||||
void *clos)\fR
|
||||
Pointer to the function that performs command substitution. It treats
|
||||
the first \fIlen\fR bytes of the string \fIcmd\fR as a command
|
||||
(whatever it means for the caller) and attempts to execute it. On
|
||||
|
@ -376,7 +555,7 @@ in the memory location pointed to by \fBret\fR and \fB0\fR is
|
|||
returned. On error,
|
||||
the function must return one of the error codes described in the section
|
||||
.BR "ERROR CODES" .
|
||||
If \fIws_getvar\fR returns
|
||||
If \fIws_command\fR returns
|
||||
.BR WRDSE_USERERR ,
|
||||
it must store the pointer to the error description string in
|
||||
.BR *ret .
|
||||
|
@ -555,7 +734,102 @@ for details.
|
|||
The
|
||||
.I ws_options
|
||||
member is initialized.
|
||||
.SH OPTIONS
|
||||
The
|
||||
.I ws_options
|
||||
member is consulted if the
|
||||
.B WRDSF_OPTIONS
|
||||
flag is set. It contains a bitwise \fBOR\fR of one or more of the
|
||||
following options:
|
||||
.TP
|
||||
.B WRDSO_NULLGLOB
|
||||
Remove the words that produce empty string after pathname expansion.
|
||||
.TP
|
||||
.B WRDSO_FAILGLOB
|
||||
Output error message if pathname expansion produces empty string.
|
||||
.TP
|
||||
.B WRDSO_DOTGLOB
|
||||
During pathname expansion allow a leading period to be matched by
|
||||
metacharacters.
|
||||
.TP
|
||||
.B WRDSO_ARGV
|
||||
Split command invocation into words and pass the result to the
|
||||
\fIws_command\fR function in \fIargv\fR parameter.
|
||||
.PP
|
||||
.TP
|
||||
.B WRDSO_BSKEEP_WORD
|
||||
Quote removal: when an unrecognized escape sequence is encountered in a word,
|
||||
preserve it on output. If that bit is not set, the backslash is
|
||||
removed from such sequences.
|
||||
.TP
|
||||
.B WRDSO_OESC_WORD
|
||||
Quote removal: handle octal escapes in words.
|
||||
.TP
|
||||
.B WRDSO_XESC_WORD
|
||||
Quote removal: handle hex escapes in words.
|
||||
.TP
|
||||
.B WRDSO_BSKEEP_QUOTE
|
||||
Quote removal: when an unrecognized escape sequence is encountered in
|
||||
a doubly-quoted string, preserve it on output. If that bit is not
|
||||
set, the backslash is removed from such sequences.
|
||||
.TP
|
||||
.B WRDSO_OESC_QUOTE
|
||||
Quote removal: handle octal escapes in doubly-quoted strings.
|
||||
.TP
|
||||
.B WRDSO_XESC_QUOTE
|
||||
Quote removal: handle hex escapes in doubly-quoted strings.
|
||||
.SH "ERROR CODES"
|
||||
.TP
|
||||
.BR WRDSE_OK ", " WRDSE_EOF
|
||||
Successful return.
|
||||
.TP
|
||||
.B WRDSE_QUOTE
|
||||
Missing closing quote. The \fIws_endp\fR points to the position in
|
||||
the input string where the error occurred.
|
||||
.TP
|
||||
.B WRDSE_NOSPACE
|
||||
Memory exhausted.
|
||||
.TP
|
||||
.B WRDSE_USAGE
|
||||
Invalid wordsplit usage.
|
||||
.TP
|
||||
.B WRDSE_CBRACE
|
||||
Unbalanced curly brace.
|
||||
.TP
|
||||
.B WRDSE_UNDEF
|
||||
Undefined variable. This error is returned only if the
|
||||
\fBWRDSF_UNDEF\fR flag is set.
|
||||
.TP
|
||||
.B WRDSE_NOINPUT
|
||||
Input exhausted. This is not acually an error. This code is returned
|
||||
if \fBwordsplit\fR (or \fBwordsplit_len\fR) is invoked in incremental
|
||||
mode and encounters end of input string. See the section
|
||||
.BR "INCREMENTAL MODE" .
|
||||
.TP
|
||||
.B WRDSE_PAREN
|
||||
Unbalanced parenthesis.
|
||||
.TP
|
||||
.B WRDSE_GLOBERR
|
||||
An error occurred during pattern matching.
|
||||
.TP
|
||||
.B WRDSE_USERERR
|
||||
User-defined error. Normally it is returned by \fBws_getvar\fR or
|
||||
\fBws_command\fR. Use the function
|
||||
.B wordsplit_strerror
|
||||
to get textual description of the error.
|
||||
.SH "RETURN VALUE"
|
||||
Both
|
||||
.B wordsplit
|
||||
and
|
||||
.B wordsplit_len
|
||||
return \fB0\fR on success, and a non-zero error code on
|
||||
error (see the section
|
||||
.BR "ERROR CODES" ).
|
||||
.PP
|
||||
.B wordsplit_strerror
|
||||
returns a pointer to the constant string describing the last error
|
||||
condition that occurred in
|
||||
.IR ws .
|
||||
.SH EXAMPLE
|
||||
.SH "SEE ALSO"
|
||||
.SH AUTHORS
|
||||
|
|
148
src/wordsplit.c
148
src/wordsplit.c
|
@ -176,6 +176,8 @@ wordsplit_init0 (struct wordsplit *wsp)
|
|||
wsp->ws_head = wsp->ws_tail = NULL;
|
||||
}
|
||||
|
||||
char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
|
||||
|
||||
static int
|
||||
wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
|
||||
int flags)
|
||||
|
@ -234,6 +236,30 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
|
|||
|
||||
if (!(wsp->ws_flags & WRDSF_OPTIONS))
|
||||
wsp->ws_options = 0;
|
||||
|
||||
if (wsp->ws_flags & WRDSF_ESCAPE)
|
||||
{
|
||||
if (!wsp->ws_escape[0])
|
||||
wsp->ws_escape[0] = "";
|
||||
if (!wsp->ws_escape[1])
|
||||
wsp->ws_escape[1] = "";
|
||||
}
|
||||
else
|
||||
{
|
||||
if (wsp->ws_flags & WRDSF_CESCAPES)
|
||||
{
|
||||
wsp->ws_escape[0] = wordsplit_c_escape_tab;
|
||||
wsp->ws_escape[1] = wordsplit_c_escape_tab;
|
||||
wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
|
||||
| WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
|
||||
}
|
||||
else
|
||||
{
|
||||
wsp->ws_escape[0] = "";
|
||||
wsp->ws_escape[1] = "\\\\\"\"";
|
||||
wsp->ws_options |= WRDSO_BSKEEP_QUOTE;
|
||||
}
|
||||
}
|
||||
|
||||
wsp->ws_endp = 0;
|
||||
|
||||
|
@ -551,13 +577,14 @@ coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
|
||||
char *dst, const char *src,
|
||||
size_t n);
|
||||
|
||||
static int
|
||||
wsnode_quoteremoval (struct wordsplit *wsp)
|
||||
{
|
||||
struct wordsplit_node *p;
|
||||
void (*uqfn) (char *, const char *, size_t) =
|
||||
(wsp->ws_flags & WRDSF_CESCAPES) ?
|
||||
wordsplit_c_unquote_copy : wordsplit_sh_unquote_copy;
|
||||
|
||||
for (p = wsp->ws_head; p; p = p->next)
|
||||
{
|
||||
|
@ -585,11 +612,8 @@ wsnode_quoteremoval (struct wordsplit *wsp)
|
|||
p->flags |= _WSNF_WORD;
|
||||
}
|
||||
|
||||
if (wsp->ws_flags & WRDSF_ESCAPE)
|
||||
wordsplit_general_unquote_copy (p->v.word, str, slen,
|
||||
wsp->ws_escape);
|
||||
else
|
||||
uqfn (p->v.word, str, slen);
|
||||
wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
|
||||
p->v.word, str, slen);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
@ -1825,35 +1849,6 @@ scan_word (struct wordsplit *wsp, size_t start)
|
|||
return _WRDS_OK;
|
||||
}
|
||||
|
||||
static char quote_transtab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
|
||||
|
||||
int
|
||||
wordsplit_c_unquote_char (int c)
|
||||
{
|
||||
char *p;
|
||||
|
||||
for (p = quote_transtab; *p; p += 2)
|
||||
{
|
||||
if (*p == c)
|
||||
return p[1];
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
int
|
||||
wordsplit_c_quote_char (int c)
|
||||
{
|
||||
char *p;
|
||||
|
||||
for (p = quote_transtab + sizeof (quote_transtab) - 2;
|
||||
p > quote_transtab; p -= 2)
|
||||
{
|
||||
if (*p == c)
|
||||
return p[-1];
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define to_num(c) \
|
||||
(ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
|
||||
|
||||
|
@ -1894,7 +1889,7 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
|
|||
len += 3;
|
||||
else
|
||||
{
|
||||
if (wordsplit_c_quote_char (*str) != -1)
|
||||
if (wordsplit_c_quote_char (*str))
|
||||
len += 2;
|
||||
else
|
||||
len += 4;
|
||||
|
@ -1903,47 +1898,56 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
|
|||
return len;
|
||||
}
|
||||
|
||||
void
|
||||
wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
|
||||
const char *escapable)
|
||||
int
|
||||
wsplt_unquote_char (const char *transtab, int c)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n;)
|
||||
while (*transtab && transtab[1])
|
||||
{
|
||||
if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1]))
|
||||
i++;
|
||||
*dst++ = src[i++];
|
||||
if (*transtab++ == c)
|
||||
return *transtab;
|
||||
++transtab;
|
||||
}
|
||||
*dst = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
wsplt_quote_char (const char *transtab, int c)
|
||||
{
|
||||
for (; *transtab && transtab[1]; transtab += 2)
|
||||
{
|
||||
if (transtab[1] == c)
|
||||
return *transtab;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
wordsplit_c_unquote_char (int c)
|
||||
{
|
||||
return wsplt_unquote_char (wordsplit_c_escape_tab, c);
|
||||
}
|
||||
|
||||
int
|
||||
wordsplit_c_quote_char (int c)
|
||||
{
|
||||
return wsplt_quote_char (wordsplit_c_escape_tab, c);
|
||||
}
|
||||
|
||||
void
|
||||
wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n;)
|
||||
{
|
||||
if (src[i] == '\\')
|
||||
i++;
|
||||
*dst++ = src[i++];
|
||||
}
|
||||
*dst = 0;
|
||||
}
|
||||
|
||||
void
|
||||
wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
|
||||
wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
|
||||
char *dst, const char *src, size_t n)
|
||||
{
|
||||
int i = 0;
|
||||
int c;
|
||||
|
||||
inquote = !!inquote;
|
||||
while (i < n)
|
||||
{
|
||||
if (src[i] == '\\')
|
||||
{
|
||||
++i;
|
||||
if (src[i] == 'x' || src[i] == 'X')
|
||||
if (WRDSO_ESC_TEST (ws, inquote, WRDSO_XESC)
|
||||
&& (src[i] == 'x' || src[i] == 'X'))
|
||||
{
|
||||
if (n - i < 2)
|
||||
{
|
||||
|
@ -1966,7 +1970,8 @@ wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
|
|||
}
|
||||
}
|
||||
}
|
||||
else if ((unsigned char) src[i] < 128 && ISDIGIT (src[i]))
|
||||
else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC)
|
||||
&& (unsigned char) src[i] < 128 && ISDIGIT (src[i]))
|
||||
{
|
||||
if (n - i < 1)
|
||||
{
|
||||
|
@ -1988,8 +1993,17 @@ wordsplit_c_unquote_copy (char *dst, const char *src, size_t n)
|
|||
}
|
||||
}
|
||||
}
|
||||
else if ((c = wsplt_unquote_char (ws->ws_escape[inquote], src[i])))
|
||||
{
|
||||
*dst++ = c;
|
||||
++i;
|
||||
}
|
||||
else
|
||||
*dst++ = wordsplit_c_unquote_char (src[i++]);
|
||||
{
|
||||
if (WRDSO_ESC_TEST (ws, inquote, WRDSO_BSKEEP))
|
||||
*dst++ = '\\';
|
||||
*dst++ = src[i++];
|
||||
}
|
||||
}
|
||||
else
|
||||
*dst++ = src[i++];
|
||||
|
@ -2023,7 +2037,7 @@ wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
|
|||
{
|
||||
int c = wordsplit_c_quote_char (*src);
|
||||
*dst++ = '\\';
|
||||
if (c != -1)
|
||||
if (c)
|
||||
*dst++ = c;
|
||||
else
|
||||
{
|
||||
|
|
|
@ -43,7 +43,7 @@ struct wordsplit
|
|||
Additional options. */
|
||||
const char *ws_delim; /* [Input] (WRDSF_DELIM) Word delimiters. */
|
||||
const char *ws_comment; /* [Input] (WRDSF_COMMENT) Comment characters. */
|
||||
const char *ws_escape; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
|
||||
const char *ws_escape[2]; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
|
||||
with backslash. */
|
||||
void (*ws_alloc_die) (wordsplit_t *wsp);
|
||||
/* [Input] (WRDSF_ALLOC_DIE) Function called when
|
||||
|
@ -184,13 +184,35 @@ struct wordsplit
|
|||
WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES)
|
||||
|
||||
/* Remove the word that produces empty string after path expansion */
|
||||
#define WRDSO_NULLGLOB 0x01
|
||||
#define WRDSO_NULLGLOB 0x00000001
|
||||
/* Print error message if path expansion produces empty string */
|
||||
#define WRDSO_FAILGLOB 0x02
|
||||
#define WRDSO_FAILGLOB 0x00000002
|
||||
/* Allow a leading period to be matched by metacharacters. */
|
||||
#define WRDSO_DOTGLOB 0x04
|
||||
#define WRDSO_DOTGLOB 0x00000004
|
||||
/* ws_command needs argv parameter */
|
||||
#define WRDSO_ARGV 0x08
|
||||
#define WRDSO_ARGV 0x00000008
|
||||
/* Keep backslash in unrecognized escape sequences in words */
|
||||
#define WRDSO_BSKEEP_WORD 0x00000010
|
||||
/* Handle octal escapes in words */
|
||||
#define WRDSO_OESC_WORD 0x00000020
|
||||
/* Handle hex escapes in words */
|
||||
#define WRDSO_XESC_WORD 0x00000040
|
||||
|
||||
/* Keep backslash in unrecognized escape sequences in quoted strings */
|
||||
#define WRDSO_BSKEEP_QUOTE 0x00000100
|
||||
/* Handle octal escapes in quoted strings */
|
||||
#define WRDSO_OESC_QUOTE 0x00000200
|
||||
/* Handle hex escapes in quoted strings */
|
||||
#define WRDSO_XESC_QUOTE 0x00000400
|
||||
|
||||
#define WRDSO_BSKEEP WRDSO_BSKEEP_WORD
|
||||
#define WRDSO_OESC WRDSO_OESC_WORD
|
||||
#define WRDSO_XESC WRDSO_XESC_WORD
|
||||
|
||||
/* Set escape option F in WS for words (Q==0) or quoted strings (Q==1) */
|
||||
#define WRDSO_ESC_SET(ws,q,f) ((ws)->ws_options |= ((f) << 4*(q)))
|
||||
/* Test WS for escape option F for words (Q==0) or quoted strings (Q==1) */
|
||||
#define WRDSO_ESC_TEST(ws,q,f) ((ws)->ws_options & ((f) << 4*(q)))
|
||||
|
||||
#define WRDSE_OK 0
|
||||
#define WRDSE_EOF WRDSE_OK
|
||||
|
@ -215,10 +237,6 @@ int wordsplit_c_unquote_char (int c);
|
|||
int wordsplit_c_quote_char (int c);
|
||||
size_t wordsplit_c_quoted_length (const char *str, int quote_hex,
|
||||
int *quote);
|
||||
void wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
|
||||
const char *escapable);
|
||||
void wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n);
|
||||
void wordsplit_c_unquote_copy (char *dst, const char *src, size_t n);
|
||||
void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex);
|
||||
|
||||
void wordsplit_perror (wordsplit_t *ws);
|
||||
|
|
|
@ -362,8 +362,10 @@ TESTWSP([suppress ws trimming within quotes],
|
|||
4: "formatfield=In message %{text}, "
|
||||
])
|
||||
|
||||
# FIXME: numbering
|
||||
TESTWSP([unescape],
|
||||
[wsp-unescape wsp33],[-default novar nocmd quote escape '\"'],
|
||||
[wsp-unescape wsp-unescape-simple wsp33],
|
||||
[-default novar nocmd quote escape ':+:\\""'],
|
||||
[\Seen "quote \"" "bs \\"],
|
||||
[NF: 3
|
||||
0: \\Seen
|
||||
|
@ -371,6 +373,21 @@ TESTWSP([unescape],
|
|||
2: "bs \\"
|
||||
])
|
||||
|
||||
TESTWSP([unescape: word/quote],
|
||||
[wsp-unescape wsp-unescape-word wsp33],
|
||||
[-default novar nocmd quote escape-word '\\""' escape-quote ':+0x:\\""'],
|
||||
[\Seen "quote \"" "bs \\" "3\x31 \101" 3\x31 \101],
|
||||
[NF: 6
|
||||
0: Seen
|
||||
1: "quote \""
|
||||
2: "bs \\"
|
||||
3: "31 A"
|
||||
4: 3x31
|
||||
5: 101
|
||||
])
|
||||
|
||||
# END FIXME
|
||||
|
||||
TESTWSP([dquote],[wsp34],[-default novar nocmd dquote],
|
||||
[a "quoted example" isn't it],
|
||||
[NF: 4
|
||||
|
|
61
tests/wsp.c
61
tests/wsp.c
|
@ -106,6 +106,8 @@ help ()
|
|||
printf (" -%s\n", string_keytab[i].name);
|
||||
printf (" %s ARG\n", string_keytab[i].name);
|
||||
}
|
||||
printf (" escape-word ARG\n");
|
||||
printf (" escape-quote ARG\n");
|
||||
putchar ('\n');
|
||||
for (i = 0; opt_keytab[i].name; i++)
|
||||
{
|
||||
|
@ -281,6 +283,41 @@ struct kwd env_keytab[] = {
|
|||
{ NULL }
|
||||
};
|
||||
|
||||
static void
|
||||
set_escape_string (wordsplit_t *ws, int *wsflags, int q, const char *str)
|
||||
{
|
||||
if (*str == ':')
|
||||
{
|
||||
while (*++str != ':')
|
||||
{
|
||||
int f;
|
||||
switch (*str)
|
||||
{
|
||||
case '+':
|
||||
f = WRDSO_BSKEEP;
|
||||
break;
|
||||
|
||||
case '0':
|
||||
f = WRDSO_OESC;
|
||||
break;
|
||||
|
||||
case 'x':
|
||||
f = WRDSO_XESC;
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf (stderr, "%s: invalid escape flag near %s\n",
|
||||
progname, str);
|
||||
abort ();
|
||||
}
|
||||
WRDSO_ESC_SET (ws, q, f);
|
||||
}
|
||||
*wsflags |= WRDSF_OPTIONS;
|
||||
++str;
|
||||
}
|
||||
ws->ws_escape[q] = str;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
|
@ -397,7 +434,8 @@ main (int argc, char **argv)
|
|||
break;
|
||||
|
||||
case WRDSF_ESCAPE:
|
||||
ws.ws_escape = argv[i];
|
||||
set_escape_string (&ws, &wsflags, 0, argv[i]);
|
||||
set_escape_string (&ws, &wsflags, 1, argv[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -406,6 +444,27 @@ main (int argc, char **argv)
|
|||
continue;
|
||||
}
|
||||
|
||||
if (strcmp (opt, "escape-word") == 0
|
||||
|| strcmp (opt, "escape-quote") == 0)
|
||||
{
|
||||
int q = opt[7] == 'q';
|
||||
|
||||
i++;
|
||||
if (i == argc)
|
||||
{
|
||||
fprintf (stderr, "%s: missing argument for %s\n",
|
||||
progname, opt);
|
||||
exit (1);
|
||||
}
|
||||
if (!(wsflags & WRDSF_ESCAPE))
|
||||
{
|
||||
wsflags |= WRDSF_ESCAPE;
|
||||
ws.ws_escape[!q] = NULL;
|
||||
}
|
||||
set_escape_string (&ws, &wsflags, q, argv[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (strcmp (opt, "dooffs") == 0)
|
||||
{
|
||||
if (negate)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue