mirror of
git://git.gnu.org.ua/wordsplit.git
synced 2025-04-25 08:09:53 +03:00

* README: Update. * wordsplit.3: Document changes. * wordsplit.at: Test backward compatibility quirk. * wordsplit.c: Make sure NULL and DELIM nodes are protected from expansions. (wordsplit_finish): Ensure the output array produced with WRDSF_RETURN_DELIMS is consistent with that produced without this flag. Provide new option, WRDSO_RETDELNOTEMPTY, to request old buggy behavior. * wordsplit.h (WRDSO_RETDELNOTEMPTY): New option. * wsp.c: New tests.
1311 lines
40 KiB
Groff
1311 lines
40 KiB
Groff
.\" This file is part of wordsplit -*- nroff -*-
|
|
.\" Copyright (C) 2009-2025 Sergey Poznyakoff
|
|
.\"
|
|
.\" Wordsplit is free software; you can redistribute it and/or modify
|
|
.\" it under the terms of the GNU General Public License as published by
|
|
.\" the Free Software Foundation; either version 3, or (at your option)
|
|
.\" any later version.
|
|
.\"
|
|
.\" Wordsplit is distributed in the hope that it will be useful,
|
|
.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
.\" GNU General Public License for more details.
|
|
.\"
|
|
.\" You should have received a copy of the GNU General Public License
|
|
.\" along with wordsplit. If not, see <http://www.gnu.org/licenses/>.
|
|
.\"
|
|
.TH WORDSPLIT 3 "March 15, 2025" "WORDSPLIT" "Wordsplit User Reference"
|
|
.SH NAME
|
|
wordsplit \- split string into words
|
|
.SH SYNOPSIS
|
|
.B #include <wordsplit.h>
|
|
.sp
|
|
\fBint wordsplit (const char *\fIs\fB,\
|
|
wordsplit_t *\fIws\fB, int \fIflags\fB);\fR
|
|
.sp
|
|
\fBint wordsplit_len (const char *\fIs\fB,\
|
|
\fBsize_t \fIlen\fR,\
|
|
\fBwordsplit_t *\fIp\fB,\
|
|
int \fIflags\fB);
|
|
.sp
|
|
\fBvoid wordsplit_free (wordsplit_t *\fIp\fB);\fR
|
|
.sp
|
|
\fBvoid wordsplit_free_words (wordsplit_t *\fIws\fB);\fR
|
|
.sp
|
|
\fBvoid wordsplit_getwords (wordsplit_t *\fIws\fB,\
|
|
int *\fIwordc\fB, char ***\fIwordv\fB);
|
|
.sp
|
|
\fBvoid wordsplit_perror (wordsplit_t *\fIws\fB);\fR
|
|
.sp
|
|
\fBconst char *wordsplit_strerror (wordsplit_t *\fIws\fB);\fR
|
|
.sp
|
|
\fBvoid wordsplit_clearerr (wordsplit_t *\fIws\fB);\fR
|
|
.SH DESCRIPTION
|
|
The function \fBwordsplit\fR splits the string \fIs\fR into words
|
|
using a set of rules governed by \fIflags\fR. Depending on
|
|
\fIflags\fR, the function performs the following operations:
|
|
whitespace trimming, tilde expansion, variable expansion, quote
|
|
removal, command substitution, and path expansion. On success,
|
|
\fBwordsplit\fR returns 0 and stores the words found in the member
|
|
\fBws_wordv\fR and the number of words in the member \fBws_wordc\fR.
|
|
On error, a non-zero error code is returned.
|
|
.PP
|
|
The function \fBwordsplit_len\fR acts similarly, except that it
|
|
accesses only first \fBlen\fR bytes of the string \fIs\fR, which is
|
|
not required to be null-terminated.
|
|
.PP
|
|
When no longer needed, the resources allocated by a call to one of
|
|
these functions must be freed using
|
|
.BR wordsplit_free .
|
|
.PP
|
|
The function
|
|
.B wordsplit_free_words
|
|
frees only the memory allocated for elements of
|
|
.I ws_wordv
|
|
after which it resets
|
|
.I ws_wordv to
|
|
.B NULL
|
|
and
|
|
.I ws_wordc
|
|
to zero.
|
|
.PP
|
|
The usual calling sequence is:
|
|
.PP
|
|
.EX
|
|
wordsplit_t ws;
|
|
int rc;
|
|
|
|
if (wordsplit(s, &ws, WRDSF_DEFFLAGS)) {
|
|
for (i = 0; i < ws.ws_wordc; i++) {
|
|
/* do something with ws.ws_wordv[i] */
|
|
}
|
|
}
|
|
wordsplit_free(&ws);
|
|
.EE
|
|
.PP
|
|
Notice, that \fBwordsplit_free\fR must be called after each invocation
|
|
of \fBwordsplit\fR or \fBwordsplit_len\fR, even if it resulted in
|
|
error.
|
|
.PP
|
|
The function
|
|
.B wordsplit_getwords
|
|
returns in \fIwordv\fR an array of words, and in \fIwordc\fR the number
|
|
of elements in \fIwordv\fR. The array can be used after calling
|
|
.BR wordsplit_free .
|
|
The caller becomes responsible for freeing the memory allocated for
|
|
each element of the array and the array pointer itself.
|
|
.PP
|
|
The function
|
|
.B wordsplit_perror
|
|
prints error message from the last invocation of \fBwordsplit\fR. It
|
|
uses the function pointed to by the
|
|
.I ws_error
|
|
member. By default, it outputs the message on the standard error.
|
|
.PP
|
|
For more sophisticated error reporting, the function
|
|
.B wordsplit_strerror
|
|
can be used. It returns a pointer to the string describing the error.
|
|
The caller should treat this pointer as a constant string. It should
|
|
not try to alter or deallocate it.
|
|
.PP
|
|
The function
|
|
.B wordsplit_clearerr
|
|
clears the error condition associated with \fIws\fR.
|
|
.SH INCREMENTAL MODE
|
|
In incremental mode \fBwordsplit\fR parses one word per invocation.
|
|
It returns \fBWRDSF_OK\fR on success and \fBWRDSF_NOINPUT\fR when
|
|
entire input string has been processed.
|
|
.PP
|
|
This mode is enabled if the flag \fBWRDSF_INCREMENTAL\fR is set in
|
|
the \fIflags\fR argument. Subsequent calls to \fBwordsplit\fR must
|
|
have \fBNULL\fR as first argument. Each successful
|
|
call will return exactly one word in \fBws.ws_wordv[0]\fR.
|
|
.PP
|
|
An example usage:
|
|
.PP
|
|
.EX
|
|
wordsplit_t ws;
|
|
int rc;
|
|
flags = WRDSF_DEFFLAGS|WRDSF_INCREMENTAL;
|
|
|
|
for (rc = wordsplit(s, &ws, flags); rc == WRDSF_OK;
|
|
rc = wordsplit(NULL, &ws, flags)) {
|
|
process(ws.ws_wordv[0]);
|
|
}
|
|
|
|
if (rc != WRDSE_NOINPUT)
|
|
wordsplit_perror(&ws);
|
|
|
|
wordsplit_free(&ws);
|
|
.EE
|
|
.SH OPTIONS
|
|
The number of flags is limited to 32 (the width of \fBuint32_t\fR data
|
|
type). By the time of this writing each bit is already occupied by a
|
|
corresponding flag. However, the number of features \fBwordsplit\fR
|
|
provides requires still more. Additional features can be requested by
|
|
setting a corresponding \fIoption bit\fR in the \fBws_option\fR field
|
|
of the \fBstruct wordsplit\fR argument. To inform wordsplit functions
|
|
that this field is initialized the \fBWRDSF_OPTIONS\fR flag must be set.
|
|
.PP
|
|
Option symbolic names begin with \fBWRDSO_\fR. They are discussed in
|
|
detail in the subsequent chapters.
|
|
.SH EXPANSION
|
|
Expansion is performed on the input after it has been split into
|
|
words. The kinds of expansion to be performed are controlled by the
|
|
appropriate bits set in the \fIflags\fR argument. Whatever expansion
|
|
kinds are enabled, they are always run in the order described in this
|
|
section.
|
|
.SS Whitespace trimming
|
|
Whitespace trimming removes any leading and trailing whitespace from
|
|
the initial word array. It is enabled by the
|
|
.B WRDSF_WS
|
|
flag. Whitespace trimming is enabled automatically if the word
|
|
delimiters (\fIws_delim\fR member) contain whitespace characters
|
|
(\fB\(dq \\t\\n\(dq\fR), which is the default.
|
|
.SS Variable expansion
|
|
Variable expansion replaces each occurrence of
|
|
.BI $ NAME
|
|
or
|
|
.BI ${ NAME }
|
|
with the value of the variable \fINAME\fR. It is enabled by default
|
|
and can be disabled by setting the \fBWRDSF_NOVAR\fR flag. The caller
|
|
is responsible for supplying the table of available variables. Two
|
|
mechanisms are provided: environment array and a callback function.
|
|
.PP
|
|
Environment array is a \fBNULL\fR-terminated array of variables,
|
|
stored in the \fIws_env\fR member. The \fBWRDSF_ENV\fR flag must be
|
|
set in order to instruct \fBwordsplit\fR to use this array.
|
|
.PP
|
|
By default, elements of the \fIws_env\fR array have the form
|
|
.IR NAME = VALUE .
|
|
An alternative format is enabled by the
|
|
.B WRDSF_ENV_KV
|
|
flag. When it is set, each variable is described by two consecutive
|
|
elements in the array:
|
|
.IR ws_env [ n ]
|
|
containing the variable name, and
|
|
.IR ws_env [ "n+1" ]
|
|
containing its value. If the latter is \fBNULL\fR, the corresponding
|
|
variable is undefined.
|
|
.PP
|
|
More sophisticated variable tables can be implemented using
|
|
callback function. The \fIws_getvar\fR member should be set to point
|
|
to that function and \fBWRDSF_GETVAR\fR flag must be set. The
|
|
function itself shall be defined as
|
|
.PP
|
|
.EX
|
|
int getvar (char **ret, const char *var, size_t len, void *clos);
|
|
.EE
|
|
.PP
|
|
The function shall look up the variable identified by the first
|
|
\fIlen\fR bytes of the string \fIvar\fR. If the variable is found,
|
|
the function shall store a copy of its value (allocated using
|
|
\fBmalloc\fR(3)) in the memory location pointed to by \fBret\fR, and
|
|
return \fBWRDSE_OK\fR. If the variable is not found, the function shall
|
|
return \fBWRDSE_UNDEF\fR. Otherwise, a non-zero error code shall be
|
|
returned.
|
|
.PP
|
|
If \fIws_getvar\fR returns
|
|
.BR WRDSE_USERERR ,
|
|
it must store the pointer to the error description string in
|
|
.BR *ret .
|
|
In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR), the
|
|
data returned in \fBret\fR must be allocated using
|
|
.BR malloc (3).
|
|
.PP
|
|
If both
|
|
.I ws_env
|
|
and
|
|
.I ws_getvar
|
|
are used, the variable is first looked up in
|
|
.IR ws_env .
|
|
If it is not found there, the
|
|
.I ws_getvar
|
|
callback is invoked.
|
|
This order is reverted if the \fBWRDSO_GETVARPREF\fR option is set.
|
|
.PP
|
|
During variable expansion, the forms below cause
|
|
.B wordsplit
|
|
to test for a variable that is unset or null. Omitting the
|
|
colon results in a test only for a variable that is unset.
|
|
.TP
|
|
.BI ${ variable :- word }
|
|
.BR "Use Default Values" .
|
|
If \fIvariable\fR is unset or null, the expansion of \fIword\fR is substituted.
|
|
Otherwise, the value of \fIvariable\fR is substituted.
|
|
.TP
|
|
.BI ${ variable := word }
|
|
.BR "Assign Default Values" .
|
|
If \fIvariable\fR is unset or null, the expansion of \fIword\fR is
|
|
assigned to \fIvariable\fR. The value of \fIvariable\fR is then substituted.
|
|
.TP
|
|
.BI ${ variable :? word }
|
|
.BR "Display Error if Null or Unset" .
|
|
If \fIvariable\fR is null or unset, the expansion of \fIword\fR (or a
|
|
message to that effect if word is not present) is output using
|
|
.IR ws_error .
|
|
Otherwise, the value of \fIvariable\fR is substituted.
|
|
.TP
|
|
.BI ${ variable :+ word }
|
|
.BR "Use Alternate Value" .
|
|
If \fIvariable\fR is null or unset, nothing is substituted, otherwise the
|
|
expansion of \fIword\fR is substituted.
|
|
.PP
|
|
Unless the above forms are used, a reference to an undefined variable
|
|
expands to empty string. Three flags affect this behavior. If the
|
|
\fBWRDSF_UNDEF\fR flag is set, expanding undefined variable triggers
|
|
a \fBWRDSE_UNDEF\fR error. If the \fBWRDSF_WARNUNDEF\fR flag is set,
|
|
a non-fatal warning is emitted for each undefined variable. Finally,
|
|
if the \fBWRDSF_KEEPUNDEF\fR flag is set, references to undefined
|
|
variables are left unexpanded.
|
|
.PP
|
|
If two or three of these flags are set simultaneously, the behavior is
|
|
undefined.
|
|
.SS Positional argument expansion
|
|
\fIPositional arguments\fR are special parameters that can be
|
|
referenced in the input string by their ordinal number. The numbering
|
|
begins at \fB0\fR. The syntax for referencing positional arguments is
|
|
the same as for the variables, except that argument index is used
|
|
instead of the variable name. If the index is between 0 and 9, the
|
|
\fB$\fIN\fR form is acceptable. Otherwise, the index must be enclosed
|
|
in curly braces: \fB${\fIN\fB}\fR.
|
|
.PP
|
|
During argument expansion, references to positional arguments are
|
|
replaced with the corresponding values.
|
|
.PP
|
|
Argument expansion is requested by the \fBWRDSO_PARAMV\fR option bit.
|
|
The NULL-terminated array of variables shall be supplied in the
|
|
.I ws_paramv
|
|
member. The
|
|
.I ws_paramc
|
|
member shall be initialized to the number of elements in
|
|
.IR ws_paramv .
|
|
.PP
|
|
Setting the \fBWRDSO_PARAM_NEGIDX\fR option together with
|
|
\fBWRDSO_PARAMV\fR enables negative positional argument references.
|
|
A negative reference has the form \fB${-\fIN\fB}\fR. It is expanded
|
|
to the value of the argument with index \fB\fIws_paramc\fR \- \fIN\fR.
|
|
.SS Quote removal
|
|
During quote removal, single or double quotes surrounding a sequence
|
|
of characters are removed and the sequence itself is treated as a
|
|
single word. Characters within single quotes are treated verbatim.
|
|
Characters within double quotes undergo variable expansion and
|
|
backslash interpretation (see below).
|
|
.PP
|
|
Recognition of single quoted strings is enabled by the
|
|
\fBWRDSF_SQUOTE\fR flag. Recognition of double quotes is enabled by
|
|
the \fBWRDSF_DQUOTE\fR flag. The macro \fBWRDSF_QUOTE\fR enables both.
|
|
.SS Backslash interpretation
|
|
Backslash interpretation translates unquoted
|
|
.I escape sequences
|
|
into corresponding characters. An escape sequence is a backslash followed
|
|
by one or more characters. By default, that is if no flags are
|
|
supplied, no escape sequences are defined, and each sequence
|
|
\fB\\\fIC\fR is reproduced verbatim.
|
|
.PP
|
|
There are several ways to enable backslash interpretation and to
|
|
define escape sequences. The simplest one is to use the
|
|
\fBWRDSF_CESCAPES\fR flag. This flag defines the C-like escape
|
|
sequences:
|
|
.PP
|
|
.nf
|
|
.ta 8n 18n 42n
|
|
.ul
|
|
Sequence Expansion ASCII
|
|
\fB\\\\\fR \fB\\\fR 134
|
|
\fB\\\(dq\fR \fB\(dq\fR 042
|
|
\fB\\a\fR audible bell 007
|
|
\fB\\b\fR backspace 010
|
|
\fB\\f\fR form-feed 014
|
|
\fB\\n\fR new line 012
|
|
\fB\\r\fR charriage return 015
|
|
\fB\\t\fR horizontal tabulation 011
|
|
\fB\\v\fR vertical tabulation 013
|
|
.fi
|
|
.sp
|
|
The sequence \fB\\x\fINN\fR or \fB\\X\fINN\fR, where \fINN\fR stands
|
|
for a two-digit hex number is replaced with ASCII character \fINN\fR.
|
|
The sequence \fB\\0\fINNN\fR, where \fINNN\fR stands for a three-digit
|
|
octal number is replaced with ASCII character whose code is \fINNN\fR.
|
|
.PP
|
|
Additionally, outside of quoted strings (if these are enabled by the
|
|
use of \fBWRDSF_DQUOTE\fR flag) backslash character can be used to
|
|
escape horizontal whitespace: horizontal space (ASCII 32) and
|
|
tab (ASCII 9) characters.
|
|
.PP
|
|
The \fBWRDSF_CESCAPES\fR bit is included in the default flag
|
|
set \fBWRDSF_DEFFLAGS\fR.
|
|
.PP
|
|
The \fBWRDSF_ESCAPE\fR flag provides a more elaborate way of defining
|
|
escape sequences. If it is set, the \fBws_escape\fR member must be
|
|
initialized. This member provides escape tables for unquoted words
|
|
(\fBws_escape[WRDSX_WORD]\fR) and quoted strings
|
|
(\fBws_escape[WRDSX_QUOTE]\fR). Each table is a string consisting of
|
|
an even number of characters. In each pair of characters, the first
|
|
one is a character that can appear after backslash, and the following
|
|
one is its translation. For example, the table of C escapes is
|
|
represented as follows:
|
|
.TP
|
|
\fB\(dq\\\\\\\\"\\"a\\ab\\bf\\fn\\nr\\rt\\tv\\v\(dq\fR
|
|
.PP
|
|
It is valid to initialize \fBws_escape\fR elements to NULL. In this
|
|
case, no backslash translation occurs.
|
|
.PP
|
|
For convenience, the global variable
|
|
.B wordsplit_escape
|
|
defines several most often used escape translation tables:
|
|
.PP
|
|
.EX
|
|
extern char const *wordsplit_escape[];
|
|
.EE
|
|
.PP
|
|
It is indexed by the following constants:
|
|
.TP
|
|
.B WS_ESC_C
|
|
C-style escapes, the definition of which is shown above. This is the
|
|
translation table that is used within quoted strings when
|
|
.B WRDSF_CESCAPES
|
|
is in effect.
|
|
.TP
|
|
.B WS_ESC_C_WS
|
|
The \fBWS_ESC_C\fR table augmented by two entries: for horizontal tab
|
|
character and whitespace. This is the table that is used for unquoted
|
|
words when
|
|
.B WRDSF_CESCAPES
|
|
is in effect.
|
|
.TP
|
|
.B WS_ESC_DQ
|
|
Backslash character escapes double-quote and itself. Useful for
|
|
handling doubly-quoted strings in various Internet protocols.
|
|
.TP
|
|
.B WS_ESC_DQ_WS
|
|
Escape double-quote, backslash, horizontal tab and whitespace characters.
|
|
.PP
|
|
Interpretation of octal and hex escapes is controlled by the following
|
|
bits in \fBws_options\fR:
|
|
.TP
|
|
.B WRDSO_BSKEEP_WORD
|
|
When an unrecognized escape sequence is encountered in a word,
|
|
preserve it on output. If that bit is not set, the backslash is
|
|
removed from such sequences.
|
|
.TP
|
|
.B WRDSO_OESC_WORD
|
|
Handle octal escapes in words.
|
|
.TP
|
|
.B WRDSO_XESC_WORD
|
|
Handle hex escapes in words.
|
|
.TP
|
|
.B WRDSO_BSKEEP_QUOTE
|
|
When an unrecognized escape sequence is encountered in a doubly-quoted
|
|
string, preserve it on output. If that bit is not set, the backslash is
|
|
removed from such sequences.
|
|
.TP
|
|
.B WRDSO_OESC_QUOTE
|
|
Handle octal escapes in doubly-quoted strings.
|
|
.TP
|
|
.B WRDSO_XESC_QUOTE
|
|
Handle hex escapes in doubly-quoted strings.
|
|
.SS Command substitution
|
|
During \fIcommand substitution\fR, each word is scanned for commands.
|
|
Each command found is executed and replaced by the output it creates.
|
|
.PP
|
|
The syntax is:
|
|
.PP
|
|
.RS +4
|
|
.BI $( command )
|
|
.RE
|
|
.PP
|
|
Command substitutions may be nested.
|
|
.PP
|
|
Unless the substitution appears within double quotes, word splitting and
|
|
pathname expansion are performed on its result.
|
|
.PP
|
|
To enable command substitution, the caller must initialize the
|
|
.I ws_command
|
|
member with the address of the substitution function and make sure the
|
|
.B WRDSF_NOCMD
|
|
flag is not set.
|
|
.PP
|
|
The substitution function should be defined as follows:
|
|
.PP
|
|
.RS +4
|
|
\fBint \fIcommand\fB\
|
|
(char **\fIret\fB,\
|
|
const char *\fIcmd\fB,\
|
|
size_t \fIlen,\fB\
|
|
char **\fIargv\fB,\
|
|
void *\fIclos\fB);\fR
|
|
.RE
|
|
.PP
|
|
On input, the first \fIlen\fR bytes of \fIcmd\fR contain the command
|
|
invocation as it appeared between
|
|
.BR $( " and " ),
|
|
with all expansions performed.
|
|
.PP
|
|
The \fIargv\fR parameter contains the command
|
|
line split into words using the same settings as the input \fIws\fR structure.
|
|
.PP
|
|
The \fIclos\fR parameter supplies user-specific data, passed in the
|
|
\fIws_closure\fR member).
|
|
.PP
|
|
On success, the function stores a pointer to the
|
|
output string in the memory location pointed to by \fIret\fR and
|
|
returns \fBWRDSE_OK\fR (\fB0\fR). On error, it must return one of the
|
|
error codes described in the section
|
|
.BR "ERROR CODES" .
|
|
If
|
|
.BR WRDSE_USERERR ,
|
|
is returned, a pointer to the error description string must be stored in
|
|
.BR *ret .
|
|
.PP
|
|
When \fBWRDSE_OK\fR or \fBWRDSE_USERERR\fR is returned, the
|
|
data stored in \fB*ret\fR must be allocated using
|
|
.BR malloc (3).
|
|
.SS Tilde and pathname expansion
|
|
Both expansions are performed if the
|
|
.B WRDSF_PATHEXPAND
|
|
flag is set.
|
|
.PP
|
|
.I Tilde expansion
|
|
affects any word that begins with an unquoted tilde
|
|
character (\fB~\fR). If the tilde is followed immediately by a slash,
|
|
it is replaced with the home directory of the current user (as
|
|
determined by his \fBpasswd\fR entry). A tilde alone is handled the
|
|
same way. Otherwise, the characters between the tilde and first slash
|
|
character (or end of string, if it doesn't contain any) are treated as
|
|
a login name. and are replaced (along with the tilde itself) with the
|
|
home directory of that user. If there is no user with such login
|
|
name, the word is left unchanged.
|
|
.PP
|
|
During
|
|
.I pathname expansion
|
|
each unquoted word is scanned for characters
|
|
.BR * ", " ? ", and " [ .
|
|
If any of these appears, the word is considered a \fIpattern\fR (in
|
|
the sense of
|
|
.BR glob (3))
|
|
and is replaced with an alphabetically sorted list of file names matching the
|
|
pattern.
|
|
.PP
|
|
If no matches are found for a word
|
|
and the \fIws_options\fR member has the
|
|
.B WRDSO_NULLGLOB
|
|
bit set, the word is removed.
|
|
.PP
|
|
If the \fBWRDSO_FAILGLOB\fR option is set, an error message is output
|
|
for each such word using
|
|
.IR ws_error .
|
|
.PP
|
|
When matching a pattern, the dot at the start of a name or immediately
|
|
following a slash must be matched explicitly, unless
|
|
the \fBWRDSO_DOTGLOB\fR option is set.
|
|
.SH VARIABLE NAMES
|
|
By default a shell-like lexical structure of a variable name is
|
|
assumed. A valid variable name begins with an alphabetical
|
|
character or underscore and contains alphabetical characters, digits
|
|
and underscores.
|
|
.PP
|
|
The set of characters that constitute a variable name can be
|
|
augmented. To do so, initialize the \fBws_namechar\fR member to the
|
|
C string containing the characters to be added, set the
|
|
\fBWRDSO_NAMECHAR\fR bit in \fBws_options\fR and set the
|
|
\fBWRDSF_OPTIONS\fR bit in the \fIflags\fR argument.
|
|
.PP
|
|
For example, to allow for colons in variable names, do:
|
|
.PP
|
|
.EX
|
|
struct wordsplit ws;
|
|
ws.ws_namechar = ":";
|
|
ws.ws_options = WRDSO_NAMECHAR;
|
|
wordsplit(str, &ws, WRDSF_DEFFLAGS|WRDSF_OPTIONS);
|
|
.EE
|
|
.PP
|
|
Certain characters cannot be allowed to be a name costituent. These
|
|
are:
|
|
.BR $ ,
|
|
.BR { ,
|
|
.BR } ,
|
|
.BR * ,
|
|
.BR @ ,
|
|
.BR \- ,
|
|
.BR + ,
|
|
.BR ? ,
|
|
and
|
|
.BR = .
|
|
If any of these appears in \fBws_namechar\fR, the \fBwordsplit\fR (and
|
|
\fBwordsplit_len\fR) function will return the
|
|
.B WRDSE_USAGE
|
|
error.
|
|
.SH LIMITING THE NUMBER OF WORDS
|
|
The maximum number of words to be returned can be limited by setting
|
|
the \fBws_maxwords\fR member to the desired count, and setting the
|
|
\fBWRDSO_MAXWORDS\fR option, e.g.:
|
|
.sp
|
|
.EX
|
|
struct wordsplit ws;
|
|
ws.ws_maxwords = 3;
|
|
ws.ws_options = WRDSO_MAXWORDS;
|
|
wordsplit(str, &ws, WRDSF_DEFFLAGS|WRDSF_OPTIONS);
|
|
.EE
|
|
.PP
|
|
If the actual number of words in the expanded input is greater than
|
|
the supplied limit, the trailing part of the input will be returned in
|
|
the last word. For example, if the input to the above fragment were
|
|
\fBNow is the time for all good men\fR, then the returned words would be:
|
|
.sp
|
|
.EX
|
|
"Now"
|
|
"is"
|
|
"the time for all good men"
|
|
.EE
|
|
.SH COMPATIBILITY QUIRKS
|
|
If
|
|
.B WRDSF_RETURN_DELIMS
|
|
is set and
|
|
.B WRDSF_SQUEEZE_DELIMS
|
|
is not,
|
|
.B wordsplit
|
|
returns an empty word between each pair of contiguous delimiters.
|
|
Consider, for example, the following fragmen:
|
|
.PP
|
|
.EX
|
|
struct wordsplit ws;
|
|
ws.ws_delim = ":";
|
|
wordsplit(str, &ws, WRDSF_DELIM | WRDSF_RETURN_DELIMS);
|
|
.EE
|
|
.PP
|
|
If \fIstr\fR contained \fBroot:x:0:0::/root:/bin/sh\fR, the
|
|
resulting \fBws.ws_wordv\fR array would be:
|
|
.PP
|
|
.EX
|
|
{ "root", ":", "0", ":", "0", ":", "", ":", "/root", ":", "/bin/sh" }
|
|
.EE
|
|
.PP
|
|
Notice the empty word at index 6. Earlier versions of
|
|
.B wordsplit
|
|
(up to v1.1-7-g0e1a09c) behaved differently: several contiguous
|
|
delimiters were returned one after another, without empty words in
|
|
between, like that:
|
|
.PP
|
|
.EX
|
|
{ "root", ":", "0", ":", "0", ":", ":", "/root", ":", "/bin/sh" }
|
|
.EE
|
|
.PP
|
|
To request this behavior, use the
|
|
.B WRDSO_RETDELNOTEMPTY
|
|
option. It is not advised to be used, except to
|
|
ensure backward compatibility with earlier wordsplit versions.
|
|
.SH WORDSPLIT_T STRUCTURE
|
|
The data type \fBwordsplit_t\fR has three members that contain
|
|
output data upon return from \fBwordsplit\fR or \fBwordsplit_len\fR,
|
|
and a number of members that the caller can initialize on input in
|
|
order to customize the function behavior. For each input member there
|
|
is a corresponding flag bit, which must be set in the \fIflags\fR argument
|
|
in order to instruct the \fBwordsplit\fR function to use the member.
|
|
.SS OUTPUT
|
|
.TP
|
|
.BI size_t " ws_wordc"
|
|
Number of words in \fIws_wordv\fR. Accessible upon successful return
|
|
from \fBwordsplit\fR.
|
|
.TP
|
|
.BI "char ** " ws_wordv
|
|
Array of resulting words. Accessible upon successful return
|
|
from \fBwordsplit\fR.
|
|
.PP
|
|
The caller should not attempt to free or reallocate \fIws_wordv\fR or
|
|
any elements thereof, nor to modify \fIws_wordc\fR.
|
|
.PP
|
|
To store away the words for use after freeing \fIws\fR with
|
|
.BR wordsplit_free ,
|
|
the caller should use
|
|
.BR wordsplit_getwords .
|
|
It is more effective than copying the contents of
|
|
.I ws_wordv
|
|
manually.
|
|
.TP
|
|
.BI "size_t " ws_wordi
|
|
Total number of words processed. This field is intended for use with
|
|
.B WRDSF_INCREMENTAL
|
|
flag. If that flag is not set, the following relation holds:
|
|
.BR "ws_wordi == ws_wordc - ws_offs" .
|
|
.TP
|
|
.BI "int " ws_errno
|
|
Error code, if the invocation of \fBwordsplit\fR or
|
|
\fBwordsplit_len\fR failed. This is the same value as returned from
|
|
the function in that case.
|
|
.TP
|
|
.BI "char *" ws_errctx
|
|
On error, context in which the error occurred. For
|
|
.BR WRDSE_UNDEF ,
|
|
it is the name of the undefined variable. For
|
|
.B WRDSE_GLOBERR
|
|
- the pattern that caused error.
|
|
.sp
|
|
The caller should treat this member as
|
|
.BR "const char *" .
|
|
.PP
|
|
The following members are used if the variable expansion was requested
|
|
and the input string contained an
|
|
.B Assign Default Values
|
|
form (\fB${\fIvariable\fB:=\fIword\fB}\fR).
|
|
.TP
|
|
.BI "char **" ws_envbuf
|
|
Modified environment. It follows the same arrangement as \fIws_env\fR
|
|
on input (see the \fBWRDSF_ENV_KV\fR flag). If \fIws_env\fR was NULL (or
|
|
\fBWRDSF_ENV\fR was not set), but the \fIws_getvar\fR callback was
|
|
used, the \fIws_envbuf\fR array will contain only the modified variables.
|
|
.TP
|
|
.BI "size_t " ws_envidx
|
|
Number of entries in
|
|
.IR ws_envbuf .
|
|
.PP
|
|
If positional parameters were used (see the \fBWRDSO_PARAMV\fR option)
|
|
and any of them were modified during processing, the following two
|
|
members supply the modified parameter array.
|
|
.TP
|
|
.BI "char ** " ws_parambuf
|
|
Array of positional parameters.
|
|
.TP
|
|
.BI "size_t " ws_paramidx
|
|
Number of positional parameters.
|
|
.SS INPUT
|
|
.TP
|
|
.BI "size_t " ws_offs
|
|
If the
|
|
.B WRDSF_DOOFFS
|
|
flag is set, this member specifies the number of initial elements in
|
|
.I ws_wordv
|
|
to fill with NULLs. These elements are not counted in the returned
|
|
.IR ws_wordc .
|
|
.TP
|
|
.BI "size_t " ws_maxwords
|
|
Maximum number of words to return. For this field to take effect, the
|
|
\fBWRDSO_MAXWORDS\fR option and \fBWRDSF_OPTIONS\fR flag must be set.
|
|
For a detailed discussion, see the chapter
|
|
.BR "LIMITING THE NUMBER OF WORDS" .
|
|
.TP
|
|
.BI "int " ws_flags
|
|
Contains flags passed to wordsplit on entry. Can be used as a
|
|
read-only member when using \fBwordsplit\fR in incremental mode or
|
|
in a loop with
|
|
.B WRDSF_REUSE
|
|
flag set.
|
|
.TP
|
|
.BI "int " ws_options
|
|
Additional options used when
|
|
.B WRDSF_OPTIONS
|
|
is set.
|
|
.TP
|
|
.BI "const char *" ws_delim
|
|
Word delimiters. If initialized on input, the
|
|
.B WRDSF_DELIM
|
|
flag must be set. Otherwise, it is initialized on entry to
|
|
.B wordsplit
|
|
with the string \fB\(dq \\t\\n\(dq\fR.
|
|
.TP
|
|
.BI "const char *" ws_comment
|
|
A zero-terminated string of characters that begin an inline comment.
|
|
If initialized on input, the
|
|
.B WRDSF_COMMENT
|
|
flag must be set. By default, it's value is \fB\(dq#\(dq\fR.
|
|
.TP
|
|
.BI "const char *" ws_escape [2]
|
|
Escape tables for unquoted words (\fBws_escape[0]\fR) and quoted
|
|
strings (\fBws_escape[1]\fR). These are used to translate escape
|
|
sequences (\fB\\\fIC\fR) into characters. Each table is a string
|
|
consisting of even number of characters. In each pair of characters,
|
|
the first one is a character that can appear after backslash, and the
|
|
following one is its representation. For example, the string
|
|
\fB\(dqt\\tn\\n\(dq\fR translates \fB\\t\fR into horizontal
|
|
tabulation character and \fB\\n\fR into newline.
|
|
.B WRDSF_ESCAPE
|
|
flag must be set if this member is initialized.
|
|
.TP
|
|
.BI "const char *" ws_namechar
|
|
Lists characters that are allowed in a variable name, in addition to
|
|
alphanumerics and underscore. The
|
|
.B WRDSO_NAMECHAR
|
|
bit must be set in
|
|
.B ws_options
|
|
for this to take effect.
|
|
.sp
|
|
See the chapter
|
|
.BR "VARIABLE NAMES" ,
|
|
for a detailed discussion.
|
|
.TP
|
|
.BI "void (*" ws_alloc_die ") (wordsplit_t *)"
|
|
This function is called when
|
|
.B wordsplit
|
|
is unable to allocate memory and the
|
|
.B WRDSF_ENOMEMABRT
|
|
flag was set. The default function prints a
|
|
message on standard error and aborts. This member can be used
|
|
to customize error handling. If initialized, the
|
|
.B WRDSF_ALLOC_DIE
|
|
flag must be set.
|
|
.TP
|
|
.BI "void (*" ws_error ") (const char *, ...)"
|
|
Pointer to function used for error reporting. The invocation
|
|
convention is the same as for
|
|
.BR printf (3).
|
|
The default function formats and prints the message on the standard
|
|
error.
|
|
|
|
If this member is initialized, the
|
|
.B WRDSF_ERROR
|
|
flag must be set.
|
|
.TP
|
|
.BI "void (*" ws_debug ") (const char *, ...)"
|
|
Pointer to function used for debugging output. By default it points
|
|
to the same function as
|
|
.BR ws_error .
|
|
If initialized, the
|
|
.B WRDSF_DEBUG
|
|
flag must be set.
|
|
.TP
|
|
.BR "const char **" ws_env
|
|
A \fBNULL\fR-terminated array of environment variables. It is used
|
|
during variable expansion. If set, the
|
|
.B WRDSF_ENV
|
|
flag must be set. Variable expansion is enabled only if either
|
|
.B WRDSF_ENV
|
|
or
|
|
.B WRDSF_GETVAR
|
|
(see below) is set, and
|
|
.B WRDSF_NOVAR
|
|
flag is not set.
|
|
|
|
Each element of
|
|
.I ws_env
|
|
must have the form \fB\(dq\fINAME\fB=\fIVALUE\fR, where \fINAME\fR is
|
|
the name of the variable, and \fIVALUE\fR is its value.
|
|
Alternatively, if the \fBWRDSF_ENV_KV\fR flag is set, each variable is
|
|
described by two elements of
|
|
.IR ws_env :
|
|
one containing variable name, and the next one with its
|
|
value.
|
|
.TP
|
|
.BI "int (*" ws_getvar ") (char **ret, const char *var, size_t len, void *clos)"
|
|
Points to the function that will be used during variable expansion for
|
|
environment variable lookups.
|
|
This function is used if the variable expansion is enabled (i.e. the
|
|
.B WRDSF_NOVAR
|
|
flag is not set), and the \fBWRDSF_GETVAR\fR flag is set.
|
|
.sp
|
|
If both
|
|
.B WRDSF_ENV
|
|
and
|
|
.B WRDSF_GETVAR
|
|
are set, the variable is first looked up in the
|
|
.I ws_env
|
|
array and, if not found there,
|
|
.I ws_getvar
|
|
is called. If the \fBWRDSO_GETVARPREF\fR option is set, this order is
|
|
reverted.
|
|
.sp
|
|
The name of the variable is specified by the first \fIlen\fR bytes of
|
|
the string \fIvar\fR. The \fIclos\fR parameter supplies the
|
|
user-specific data (see below the description of \fIws_closure\fR
|
|
member) and the \fBret\fR parameter points to the memory location
|
|
where output data is to be stored. On success, the function must
|
|
store there a pointer to the string with the value of the variable and
|
|
return 0. On error, it must return one of the error codes described
|
|
in the section
|
|
.BR "ERROR CODES" .
|
|
If \fIws_getvar\fR returns
|
|
.BR WRDSE_USERERR ,
|
|
it must store the pointer to the error description string in
|
|
.BR *ret .
|
|
In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR), the
|
|
data returned in \fBret\fR must be allocated using
|
|
.BR malloc (3).
|
|
.TP
|
|
.BI "void *" ws_closure
|
|
Additional user-specific data passed as the last argument to
|
|
.I ws_getvar
|
|
or
|
|
.I ws_command
|
|
(see below). If defined, the
|
|
.B WRDSF_CLOSURE
|
|
flag must be set.
|
|
.TP
|
|
\fBint (*\fIws_command\fB)\
|
|
(char **ret,\
|
|
const char *cmd,\
|
|
size_t len,\
|
|
char **argv,\
|
|
void *clos)\fR
|
|
Pointer to the function that performs command substitution. It treats
|
|
the first \fIlen\fR bytes of the string \fIcmd\fR as a command
|
|
(whatever it means for the caller) and attempts to execute it. On
|
|
success, a pointer to the string with the command output is stored
|
|
in the memory location pointed to by \fBret\fR and \fB0\fR is
|
|
returned. On error,
|
|
the function must return one of the error codes described in the section
|
|
.BR "ERROR CODES" .
|
|
If \fIws_command\fR returns
|
|
.BR WRDSE_USERERR ,
|
|
it must store the pointer to the error description string in
|
|
.BR *ret .
|
|
In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR), the
|
|
data returned in \fBret\fR must be allocated using
|
|
.BR malloc (3).
|
|
|
|
The parameter \fBargv\fR contains the command split into
|
|
words using the same settings as the input \fIws\fR structure, with
|
|
command substitution disabled.
|
|
|
|
The \fIclos\fR parameter supplies user-specific data (see the
|
|
description of \fIws_closure\fR member).
|
|
.PP
|
|
The following two members are consulted if the \fBWRDSO_PARAMV\fR
|
|
option is set. They provide an array of positional parameters.
|
|
.TP
|
|
.BI "char const **" ws_paramv
|
|
Positional parameters. These are accessible in the input string using
|
|
the notation \fB$\fIN\fR or \fB${\fIN\fB}\fR, where \fIN\fR is the
|
|
0-based parameter number.
|
|
.TP
|
|
.BI "size_t " ws_paramc
|
|
Number of positional parameters.
|
|
.SH FLAGS
|
|
The following macros are defined for use in the \fBflags\fR argument.
|
|
.TP
|
|
.B WRDSF_DEFFLAGS
|
|
Default flags. This is a shortcut for:
|
|
|
|
\fB(WRDSF_NOVAR |\
|
|
WRDSF_NOCMD |\
|
|
WRDSF_QUOTE |\
|
|
WRDSF_SQUEEZE_DELIMS |\
|
|
WRDSF_CESCAPES)\fR,
|
|
|
|
i.e.: disable variable expansion and quote substitution, perform quote
|
|
removal, treat any number of consecutive delimiters as a single
|
|
delimiter, replace \fBC\fR escapes appearing in the input string with
|
|
the corresponding characters.
|
|
.TP
|
|
.B WRDSF_APPEND
|
|
Append the resulting words to the array left from a previous call to
|
|
\fBwordsplit\fR.
|
|
.TP
|
|
.B WRDSF_DOOFFS
|
|
Insert
|
|
.I ws_offs
|
|
initial
|
|
.BR NULL s
|
|
in the array
|
|
.IR ws_wordv .
|
|
These are not counted in the returned
|
|
.IR ws_wordc .
|
|
.TP
|
|
.B WRDSF_NOCMD
|
|
Don't do command substitution. The \fBWRDSO_NOCMDSPLIT\fR option set
|
|
together with this flag prevents splitting command invocations
|
|
into separate words (see the \fBOPTIONS\fR section).
|
|
.TP
|
|
.B WRDSF_REUSE
|
|
The parameter \fIws\fR resulted from a previous call to
|
|
\fBwordsplit\fR, and \fBwordsplit_free\fR was not called. Reuse the
|
|
allocated storage.
|
|
.TP
|
|
.B WRDSF_SHOWERR
|
|
Print errors using
|
|
.BR ws_error .
|
|
.TP
|
|
.B WRDSF_UNDEF
|
|
Consider it an error if an undefined variable is expanded.
|
|
.TP
|
|
.B WRDSF_NOVAR
|
|
Don't do variable expansion. The \fBWRDSO_NOVARSPLIT\fR option set
|
|
together with this flag prevents variable references from being split
|
|
into separate words (see the \fBOPTIONS\fR section).
|
|
.TP
|
|
.B WRDSF_ENOMEMABRT
|
|
Abort on
|
|
.B ENOMEM
|
|
error. By default, out of memory errors are treated as any other
|
|
errors: the error is reported using \fIws_error\fR if the
|
|
.B WRDSF_SHOWERR
|
|
flag is set, and error code is returned. If this flag is set, the
|
|
.B ws_alloc_die
|
|
function is called instead. This function is not supposed to return.
|
|
.TP
|
|
.B WRDSF_WS
|
|
Trim off any leading and trailing whitespace from the returned
|
|
words. This flag is useful if the \fIws_delim\fR member does not
|
|
contain whitespace characters.
|
|
.TP
|
|
.B WRDSF_SQUOTE
|
|
Handle single quotes.
|
|
.TP
|
|
.B WRDSF_DQUOTE
|
|
Handle double quotes.
|
|
.TP
|
|
.B WRDSF_QUOTE
|
|
A shortcut for \fB(WRDSF_SQUOTE|WRDSF_DQUOTE)\fR.
|
|
.TP
|
|
.B WRDSF_SQUEEZE_DELIMS
|
|
Replace each input sequence of repeated delimiters with a single
|
|
delimiter.
|
|
.TP
|
|
.B WRDSF_RETURN_DELIMS
|
|
Return delimiters.
|
|
.TP
|
|
.B WRDSF_SED_EXPR
|
|
Treat
|
|
.BR sed (1)
|
|
expressions as words.
|
|
.TP
|
|
.B WRDSF_DELIM
|
|
.I ws_delim
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_COMMENT
|
|
.I ws_comment
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_ALLOC_DIE
|
|
.I ws_alloc_die
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_ERROR
|
|
.I ws_error
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_DEBUG
|
|
.I ws_debug
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_ENV
|
|
.I ws_env
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_GETVAR
|
|
.I ws_getvar member is initialized.
|
|
.TP
|
|
.B WRDSF_SHOWDBG
|
|
Enable debugging.
|
|
.TP
|
|
.B WRDSF_NOSPLIT
|
|
Don't split input into words. This flag is is useful for side
|
|
effects, e.g. to perform variable expansion within a string.
|
|
.TP
|
|
.B WRDSF_KEEPUNDEF
|
|
Keep undefined variables in place, instead of expanding them to
|
|
empty strings.
|
|
.TP
|
|
.B WRDSF_WARNUNDEF
|
|
Warn about undefined variables.
|
|
.TP
|
|
.B WRDSF_CESCAPES
|
|
Handle \fBC\fR-style escapes in the input string.
|
|
.TP
|
|
.B WRDSF_CLOSURE
|
|
.I ws_closure
|
|
is set.
|
|
.TP
|
|
.B WRDSF_ENV_KV
|
|
Each two consecutive elements in the
|
|
.I ws_env
|
|
array describe a single variable:
|
|
.IR ws_env [ n ]
|
|
contains variable name, and
|
|
.IR ws_env [ "n+1" ]
|
|
contains its value.
|
|
.TP
|
|
.B WRDSF_ESCAPE
|
|
.I ws_escape
|
|
is set.
|
|
.TP
|
|
.B WRDSF_INCREMENTAL
|
|
Incremental mode. Each subsequent call to \fBwordsplit\fR with
|
|
\fBNULL\fR as its first argument parses the next word from the input.
|
|
See the section
|
|
.B INCREMENTAL MODE
|
|
for a detailed discussion.
|
|
.TP
|
|
.B WRDSF_PATHEXPAND
|
|
Perform pathname and tilde expansion. See the
|
|
subsection
|
|
.B "Pathname expansion"
|
|
for details.
|
|
.TP
|
|
.B WRDSF_OPTIONS
|
|
The
|
|
.I ws_options
|
|
member is initialized.
|
|
.SH OPTIONS
|
|
The
|
|
.I ws_options
|
|
member is consulted if the
|
|
.B WRDSF_OPTIONS
|
|
flag is set. It contains a bitwise \fBOR\fR of one or more of the
|
|
following options:
|
|
.TP
|
|
.B WRDSO_NULLGLOB
|
|
Remove the words that produce empty string after pathname expansion.
|
|
.TP
|
|
.B WRDSO_FAILGLOB
|
|
Output error message if pathname expansion produces empty string.
|
|
.TP
|
|
.B WRDSO_DOTGLOB
|
|
During pathname expansion allow a leading period to be matched by
|
|
metacharacters.
|
|
.PP
|
|
.TP
|
|
.B WRDSO_BSKEEP_WORD
|
|
Backslash interpretation: when an unrecognized escape sequence is
|
|
encountered in a word, preserve it on output. If that bit is not set,
|
|
the backslash is removed from such sequences.
|
|
.TP
|
|
.B WRDSO_OESC_WORD
|
|
Backslash interpretation: handle octal escapes in words.
|
|
.TP
|
|
.B WRDSO_XESC_WORD
|
|
Backslash interpretation: handle hex escapes in words.
|
|
.TP
|
|
.B WRDSO_BSKEEP_QUOTE
|
|
Backslash interpretation: when an unrecognized escape sequence is
|
|
encountered in a doubly-quoted string, preserve it on output. If that
|
|
bit is not set, the backslash is removed from such sequences.
|
|
.TP
|
|
.B WRDSO_OESC_QUOTE
|
|
Backslash interpretation: handle octal escapes in doubly-quoted strings.
|
|
.TP
|
|
.B WRDSO_XESC_QUOTE
|
|
Backslash interpretation: handle hex escapes in doubly-quoted strings.
|
|
.TP
|
|
.B WRDSO_MAXWORDS
|
|
The \fBws_maxwords\fR member is initialized. This is used to control
|
|
the number of words returned by a call to \fBwordsplit\fR. For a
|
|
detailed discussion, refer to the chapter
|
|
.BR "LIMITING THE NUMBER OF WORDS" .
|
|
.TP
|
|
.B WRDSO_NOVARSPLIT
|
|
When \fBWRDSF_NOVAR\fR is set, don't split variable references, even
|
|
if they contain whitespace. E.g.
|
|
.B ${VAR:-foo bar}
|
|
will be treated as a single word.
|
|
.TP
|
|
.B WRDSO_NOCMDSPLIT
|
|
When \fBWRDSF_NOCMD\fR is set, don't split whatever looks like command
|
|
invocation, even if it contains whitespace. E.g.
|
|
.B $(command arg)
|
|
will be treated as a single word.
|
|
.TP
|
|
.B WRDSO_PARAMV
|
|
Positional arguments are supplied in
|
|
.I ws_paramv
|
|
and
|
|
.IR ws_paramc .
|
|
See the subsection
|
|
.B Positional argument expansion
|
|
for a discussion.
|
|
.TP
|
|
.B WRDSO_PARAM_NEGIDX
|
|
Used together with \fBWRDSO_PARAMV\fR, this allows for negative
|
|
positional argument references. A negative argument reference has the
|
|
form \fB${-\fIN\fB}\fR. It is expanded to the value of the argument
|
|
with index \fB\fIws_paramc\fR \- \fIN\fR, i.e. \fIN\fRth if counting
|
|
from the end.
|
|
.TP
|
|
.B WRDSO_NAMECHAR
|
|
When set, indicates that the
|
|
.B ws_namechar
|
|
member of the
|
|
.B wordsplit_t
|
|
struct has been initialized.
|
|
.sp
|
|
This member allows you to modify the notion of what characters can be
|
|
part of a valid variable name. See the chapter
|
|
.BR "VARIABLE NAMES" ,
|
|
for a detailed discussion.
|
|
.SH "ERROR CODES"
|
|
.TP
|
|
.BR WRDSE_OK ", " WRDSE_EOF
|
|
Successful return.
|
|
.TP
|
|
.B WRDSE_QUOTE
|
|
Missing closing quote. The \fIws_endp\fR points to the position in
|
|
the input string where the error occurred.
|
|
.TP
|
|
.B WRDSE_NOSPACE
|
|
Memory exhausted.
|
|
.TP
|
|
.B WRDSE_USAGE
|
|
Invalid wordsplit usage.
|
|
.TP
|
|
.B WRDSE_CBRACE
|
|
Unbalanced curly brace.
|
|
.TP
|
|
.B WRDSE_UNDEF
|
|
Undefined variable. This error is returned only if the
|
|
\fBWRDSF_UNDEF\fR flag is set.
|
|
.TP
|
|
.B WRDSE_NOINPUT
|
|
Input exhausted. This is not actually an error. This code is returned
|
|
if \fBwordsplit\fR (or \fBwordsplit_len\fR) is invoked in incremental
|
|
mode and encounters end of input string. See the section
|
|
.BR "INCREMENTAL MODE" .
|
|
.TP
|
|
.B WRDSE_PAREN
|
|
Unbalanced parenthesis.
|
|
.TP
|
|
.B WRDSE_GLOBERR
|
|
An error occurred during pattern matching.
|
|
.TP
|
|
.B WRDSE_USERERR
|
|
User-defined error. Normally this error is returned by \fBws_getvar\fR or
|
|
\fBws_command\fR. Use the function
|
|
.B wordsplit_strerror
|
|
to get textual description of the error.
|
|
.SH "RETURN VALUE"
|
|
Both
|
|
.B wordsplit
|
|
and
|
|
.B wordsplit_len
|
|
return \fB0\fR on success, and a non-zero error code on
|
|
error (see the section
|
|
.BR "ERROR CODES" ).
|
|
.PP
|
|
.B wordsplit_strerror
|
|
returns a pointer to the constant string describing the last error
|
|
condition that occurred in
|
|
.IR ws .
|
|
.SH EXAMPLE
|
|
The short program below implements a function that parses the
|
|
input string similarly to the shell. All expansions are performed.
|
|
Default error reporting is used.
|
|
.PP
|
|
.EX
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
#include <wordsplit.h>
|
|
|
|
/* Run command from \fIstr\fR (\fIlen\fR bytes long) and store its
|
|
output in \fIret\fR.
|
|
\fIargv\fR and \fIclosure\fR are not used.
|
|
Return wordsplit error code.
|
|
*/
|
|
static int runcmd(char **ret, const char *str, size_t len,
|
|
char **argv, void *closure)
|
|
{
|
|
FILE *fp;
|
|
char *cmd;
|
|
int c, lastc;
|
|
char *buffer = NULL;
|
|
size_t bufsize = 0;
|
|
size_t buflen = 0;
|
|
|
|
/* Convert to a null-terminated string for \fBpopen\fR(3) */
|
|
cmd = malloc(len + 1);
|
|
if (!cmd)
|
|
return WRDSE_NOSPACE;
|
|
memcpy(cmd, str, len);
|
|
cmd[len] = 0;
|
|
|
|
fp = popen(cmd, "r");
|
|
if (!fp) {
|
|
char buf[128];
|
|
|
|
snprintf(buf, sizeof buf, "can't run %s: %s",
|
|
cmd, strerror(errno));
|
|
*ret = strdup(buf);
|
|
if (!*ret)
|
|
return WRDSE_NOSPACE;
|
|
else
|
|
return WRDSE_USERERR;
|
|
}
|
|
|
|
/* Collect the output, reallocating \fIbuffer\fR as needed. */
|
|
while ((c = fgetc(fp)) != EOF) {
|
|
lastc = c;
|
|
if (c == '\n')
|
|
c = ' ';
|
|
if (buflen == bufsize) {
|
|
char *p;
|
|
|
|
if (bufsize == 0)
|
|
bufsize = 80;
|
|
else
|
|
bufsize *= 2;
|
|
p = realloc(buffer, bufsize);
|
|
if (!p) {
|
|
free(buffer);
|
|
free(cmd);
|
|
return WRDSE_NOSPACE;
|
|
}
|
|
buffer = p;
|
|
}
|
|
buffer[buflen++] = c;
|
|
}
|
|
|
|
/* Tream off the trailing newline */
|
|
if (buffer) {
|
|
if (lastc == '\n')
|
|
--buflen;
|
|
buffer[buflen] = 0;
|
|
}
|
|
|
|
pclose(fp);
|
|
free(cmd);
|
|
|
|
/* Return the composed string. */
|
|
*ret = buffer;
|
|
return WRDSE_OK;
|
|
}
|
|
|
|
extern char **environ;
|
|
|
|
/* Parse \fIs\fR much as shell does. Return array of words on
|
|
succes, and NULL on error.
|
|
*/
|
|
char **shell_parse(char *s)
|
|
{
|
|
wordsplit_t ws;
|
|
size_t wc;
|
|
char **wv;
|
|
int rc;
|
|
|
|
/* Initialize \fIws\fR */
|
|
ws.ws_env = (const char **) environ;
|
|
ws.ws_command = runcmd;
|
|
/* Call \fBwordsplit\fR. Let it report errors, if any. */
|
|
rc = wordsplit(s, &ws,
|
|
WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_PATHEXPAND
|
|
| WRDSF_SHOWERR);
|
|
if (rc == WRDSE_OK)
|
|
/* Store away the resulting words on success. */
|
|
wordsplit_getwords(&ws, &wc, &wv);
|
|
else
|
|
wv = NULL;
|
|
wordsplit_free(&ws);
|
|
return wv;
|
|
}
|
|
.EE
|
|
.SH AUTHORS
|
|
Sergey Poznyakoff
|
|
.SH BUGS
|
|
Backtick command expansion is not supported.
|
|
.SH "BUG REPORTS"
|
|
Report bugs to <gray@gnu.org>.
|
|
.SH COPYRIGHT
|
|
Copyright \(co 2009\(en2025 Sergey Poznyakoff
|
|
.br
|
|
.na
|
|
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
|
|
.br
|
|
.ad
|
|
This is free software: you are free to change and redistribute it.
|
|
There is NO WARRANTY, to the extent permitted by law.
|
|
.\" Local variables:
|
|
.\" eval: (add-hook 'write-file-hooks 'time-stamp)
|
|
.\" time-stamp-start: ".TH [A-Z_][A-Z0-9_]* [0-9] \""
|
|
.\" time-stamp-format: "%:B %:d, %:y"
|
|
.\" time-stamp-end: "\""
|
|
.\" time-stamp-line-limit: 20
|
|
.\" end:
|
|
|