mirror of
git://git.gnu.org.ua/wordsplit.git
synced 2025-04-26 00:29:54 +03:00
991 lines
29 KiB
Groff
991 lines
29 KiB
Groff
.\" This file is part of grecs -*- nroff -*-
|
|
.\" Copyright (C) 2007-2016 Sergey Poznyakoff
|
|
.\"
|
|
.\" Grecs is free software; you can redistribute it and/or modify
|
|
.\" it under the terms of the GNU General Public License as published by
|
|
.\" the Free Software Foundation; either version 3, or (at your option)
|
|
.\" any later version.
|
|
.\"
|
|
.\" Grecs is distributed in the hope that it will be useful,
|
|
.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
.\" GNU General Public License for more details.
|
|
.\"
|
|
.\" You should have received a copy of the GNU General Public License
|
|
.\" along with Grecs. If not, see <http://www.gnu.org/licenses/>.
|
|
.\"
|
|
.TH WORDSPLIT 3 "December 3, 2014" "GRECS" "Grecs User Reference"
|
|
.SH NAME
|
|
wordsplit \- split string into words
|
|
.SH SYNOPSIS
|
|
.B #include <wordsplit.h>
|
|
.sp
|
|
\fBint wordsplit (const char *\fIs\fB,\
|
|
wordsplit_t *\fIws\fB, int \fIflags\fB);\fR
|
|
.sp
|
|
\fBint wordsplit_len (const char *\fIs\fB,\
|
|
\fBsize_t \fIlen\fR,\
|
|
\fBwordsplit_t *\fIp\fB,\
|
|
int \fIflags\fB);
|
|
.sp
|
|
\fBvoid wordsplit_free (wordsplit_t *\fIp\fB);\fR
|
|
.sp
|
|
\fBvoid wordsplit_free_words (wordsplit_t *\fIws\fB);\fR
|
|
.sp
|
|
\fBvoid wordsplit_getwords (wordsplit_t *\fIws\fB,\
|
|
int *\fIwordc\fB, char ***\fIwordv\fB);
|
|
.sp
|
|
\fBvoid wordsplit_perror (wordsplit_t *\fIws\fB);\fR
|
|
.sp
|
|
\fBconst char *wordsplit_strerror (wordsplit_t *\fIws\fB);\fR
|
|
.sp
|
|
\fBvoid wordsplit_clearerr (wordsplit_t *\fIws\fB);\fR
|
|
.SH DESCRIPTION
|
|
The function \fBwordsplit\fR splits the string \fIs\fR into words
|
|
using a set of rules governed by \fIflags\fR. Depending on
|
|
\fIflags\fR, the function performs the following operations:
|
|
whitespace trimming, tilde expansion, variable expansion, quote
|
|
removal, command substitution, and path expansion. On success,
|
|
\fBwordsplit\fR returns 0 and stores the words found in the member
|
|
\fBws_wordv\fR and the number of words in the member \fBws_wordc\fR.
|
|
On error, a non-zero error code is returned.
|
|
.PP
|
|
The function \fBwordsplit_len\fR acts similarly, except that it
|
|
accesses only first \fBlen\fR bytes of the string \fIs\fR, which is
|
|
not required to be null-terminated.
|
|
.PP
|
|
When no longer needed, the resources allocated by a call to one of
|
|
these functions must be freed using
|
|
.BR wordsplit_free .
|
|
.PP
|
|
The function
|
|
.B wordsplit_free_words
|
|
frees only the memory allocated for elements of
|
|
.I ws_wordv
|
|
and initializes
|
|
.I ws_wordc
|
|
to zero.
|
|
.PP
|
|
The usual calling sequence is:
|
|
.PP
|
|
.EX
|
|
wordsplit_t ws;
|
|
int rc;
|
|
|
|
if (wordsplit(s, &ws, WRDSF_DEFFLAGS)) {
|
|
wordsplit_perror(&ws);
|
|
return;
|
|
}
|
|
for (i = 0; i < ws.ws_wordc; i++) {
|
|
/* do something with ws.ws_wordv[i] */
|
|
}
|
|
wordsplit_free(&ws);
|
|
.EE
|
|
.PP
|
|
The function
|
|
.B wordsplit_getwords
|
|
returns in \fIwordv\fR an array of words, and in \fIwordc\fR the number
|
|
of elements in \fIwordv\fR. The array can be used after calling
|
|
.BR wordsplit_free .
|
|
The caller becomes responsible for freeing the memory allocated for
|
|
each element of the array and the array pointer itself.
|
|
.PP
|
|
The function
|
|
.B wordsplit_perror
|
|
prints error message from the last invocation of \fBwordsplit\fR. It
|
|
uses the function pointed to by the
|
|
.I ws_error
|
|
member. By default, it outputs the message on the standard error.
|
|
.PP
|
|
For more sophisticated error reporting, the function
|
|
.B wordsplit_strerror
|
|
can be used. It returns a pointer to the string describing the error.
|
|
The caller should treat this pointer as a constant string. It should
|
|
not try to alter or deallocate it.
|
|
.PP
|
|
The function
|
|
.B wordsplit_clearerr
|
|
clears the error condition associated with \fIws\fR.
|
|
.SH INCREMENTAL MODE
|
|
In incremental mode \fBwordsplit\fR parses one word per invocation.
|
|
It returns \fBWRDSF_OK\fR on success and \fBWRDSF_NOINPUT\fR when
|
|
entire input string has been processed.
|
|
.PP
|
|
This mode is enabled if the flag \fBWRDSF_INCREMENTAL\fR is set in
|
|
the \fIflags\fR argument. Subsequent calls to \fBwordsplit\fR must
|
|
have \fBNULL\fR as first argument. Each successful
|
|
call will return exactly one word in \fBws.ws_wordv[0]\fR.
|
|
.PP
|
|
An example usage:
|
|
.PP
|
|
.EX
|
|
wordsplit_t ws;
|
|
int rc;
|
|
flags = WRDSF_DEFFLAGS|WRDSF_INCREMENTAL;
|
|
|
|
for (rc = wordsplit(s, &ws, flags); rc == WRDSF_OK;
|
|
rc = wordsplit(NULL, &ws, flags)) {
|
|
process(ws.ws_wordv[0]);
|
|
}
|
|
|
|
if (rc != WRDSE_NOINPUT)
|
|
wordsplit_perror(&ws);
|
|
|
|
wordsplit_free(&ws);
|
|
.EE
|
|
.SH EXPANSION
|
|
Expansion is performed on the input after it has been split into
|
|
words. There are several kinds of expansion, which of them are
|
|
performed is controlled by appropriate bits set in the \fIflags\fR
|
|
argument. Whatever expansion kinds are enabled, they are always run
|
|
in the same order as described in this section.
|
|
.SS Whitespace trimming
|
|
Whitespace trimming removes any leading and trailing whitespace from
|
|
the initial word array. It is enabled by the
|
|
.B WRDSF_WS
|
|
flag. Whitespace trimming is needed only if you redefine
|
|
word delimiters (\fIws_delim\fR member) so that they don't contain
|
|
whitespace characters (\fB\(dq \\t\\n\(dq\fR).
|
|
.SS Tilde expansion
|
|
Tilde expansion is enabled if the
|
|
.B WRDSF_PATHEXPAND
|
|
bit is set. It expands all words that begin with an unquoted tilde
|
|
character (`\fB~\fR'). If tilde is followed immediately by a slash,
|
|
it is replaced with the home directory of the current user (as
|
|
determined by his \fBpasswd\fR entry). A tilde alone is handled the
|
|
same way. Otherwise, the characters between the tilde and first slash
|
|
character (or end of string, if it doesn't contain any) are treated as
|
|
a login name. and are replaced (along with the tilde itself) with the
|
|
home directory of that user. If there is no user with such login
|
|
name, the word is left unchanged.
|
|
.SS Variable expansion
|
|
Variable expansion replaces each occurrence of
|
|
.BI $ NAME
|
|
or
|
|
.BI ${ NAME }
|
|
with the value of the variable \fINAME\fR. It is enabled if the
|
|
flag \fBWRDSF_NOVAR\fR is not set. The caller is responsible for
|
|
supplying the table of available variables. Two mechanisms are
|
|
provided: environment array and a callback function.
|
|
.PP
|
|
Environment array is a \fBNULL\fR-terminated array of variables,
|
|
stored in the \fIws_env\fR member. The \fBWRDSF_ENV\fR flag must be
|
|
set in order to instruct \fBwordsplit\fR to use this array.
|
|
.PP
|
|
By default, elements of the \fIws_env\fR array have the form
|
|
.IR NAME = VALUE .
|
|
An alternative format is enabled by the
|
|
.B WRDSF_ENV_KV
|
|
flag. When it is set, each variable is described by two consecutive
|
|
elements in the array:
|
|
.IR ws_env [ n ]
|
|
containing the variable name, and
|
|
.IR ws_env [ "n+1" ]
|
|
containing its value.
|
|
.PP
|
|
More sophisticated variable tables can be implemented using
|
|
callback function. The \fIws_getvar\fR member should be set to point
|
|
to that function and \fBWRDSF_GETVAR\fR flag must be set. The
|
|
function itself shall be defined as
|
|
.PP
|
|
.EX
|
|
int getvar (char **ret, const char *var, size_t len, void *clos);
|
|
.EE
|
|
.PP
|
|
The function shall look up for the variable identified by the first
|
|
\fIlen\fR bytes of the string \fIvar\fR. If such variable is found,
|
|
the function shall store a copy of its value (allocated using
|
|
\fBmalloc\fR(3)) in the memory location pointed to by \fBret\fR, and
|
|
return \fBWRDSE_OK\fR. If the variable is not found, the function shall
|
|
return \fBWRDSE_UNDEF\fR. Otherwise, a non-zero error code shall be
|
|
returned.
|
|
.PP
|
|
If \fIws_getvar\fR returns
|
|
.BR WRDSE_USERERR ,
|
|
it must store the pointer to the error description string in
|
|
.BR *ret .
|
|
In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the
|
|
data returned in \fBret\fR must be allocated using
|
|
.BR malloc (3).
|
|
.PP
|
|
If both
|
|
.I ws_env
|
|
and
|
|
.I ws_getvar
|
|
are used, the variable is first looked up in
|
|
.IR ws_env ,
|
|
and if not found there, the
|
|
.I ws_getvar
|
|
function is called.
|
|
.PP
|
|
During variable expansion, the forms below cause
|
|
.B wordsplit
|
|
to test for a variable that is unset or null. Omitting the
|
|
colon results in a test only for a variable that is unset.
|
|
.TP
|
|
.BI ${ variable :- word }
|
|
.BR "Use Default Values" .
|
|
If \fIvariable\fR is unset or null, the expansion of \fIword\fR is substituted.
|
|
Otherwise, the value of \fIvariable\fR is substituted.
|
|
.TP
|
|
.BI ${ variable := word }
|
|
.BR "Assign Default Values" .
|
|
If \fIvariable\fR is unset or null, the expansion of \fIword\fR is
|
|
assigned to \fIvariable\fR. The value of \fIvariable\fR is then substituted.
|
|
.TP
|
|
.BI ${ variable :? word }
|
|
.BR "Display Error if Null or Unset" .
|
|
If \fIvariable\fR is null or unset, the expansion of \fIword\fR (or a
|
|
message to that effect if word is not present) is output using
|
|
.IR ws_error .
|
|
Otherwise, the value of \fIvariable\fR is substituted.
|
|
.TP
|
|
.BI ${ variable :+ word }
|
|
.BR "Use Alternate Value" .
|
|
If \fIvariable\fR is null or unset, nothing is substituted, otherwise the
|
|
expansion of \fIword\fR is substituted.
|
|
.SS Quote removal
|
|
Quote removal translates unquoted escape sequences into corresponding bytes.
|
|
An escape sequence is a backslash followed by one or more characters. By
|
|
default, each sequence \fB\\\fIC\fR appearing in unquoted words is
|
|
replaced with the character \fIC\fR. In doubly-quoted strings, two
|
|
backslash sequences are recognized: \fB\\\\\fR translates to a single
|
|
backslash, and \fB\\\(dq\fR translates to a double-quote.
|
|
.PP
|
|
Two flags are provided to modify this behavior. If
|
|
.I WRDSF_CESCAPES
|
|
flag is set, the following escape sequences are recognized:
|
|
.sp
|
|
.nf
|
|
.ta 8n 18n 42n
|
|
.ul
|
|
Sequence Expansion ASCII
|
|
\fB\\\\\fR \fB\\\fR 134
|
|
\fB\\\(dq\fR \fB\(dq\fR 042
|
|
\fB\\a\fR audible bell 007
|
|
\fB\\b\fR backspace 010
|
|
\fB\\f\fR form-feed 014
|
|
\fB\\n\fR new line 012
|
|
\fB\\r\fR charriage return 015
|
|
\fB\\t\fR horizontal tabulation 011
|
|
\fB\\v\fR vertical tabulation 013
|
|
.fi
|
|
.sp
|
|
The sequence \fB\\x\fINN\fR or \fB\\X\fINN\fR, where \fINN\fR stands
|
|
for a two-digit hex number is replaced with ASCII character \fINN\fR.
|
|
The sequence \fB\\0\fINNN\fR, where \fINNN\fR stands for a three-digit
|
|
octal number is replaced with ASCII character whose code is \fINNN\fR.
|
|
.PP
|
|
The \fBWRDSF_ESCAPE\fR flag allows the caller to customize escape
|
|
sequences. If it is set, the \fBws_escape\fR member must be
|
|
initialized. This member provides escape tables for unquoted words
|
|
(\fBws_escape[0]\fR) and quoted strings (\fBws_escape[1]\fR). Each
|
|
table is a string consisting of even number of charactes. In each
|
|
pair of characters, the first one is a character that can appear after
|
|
backslash, and the following one is its translation. For example, the
|
|
above table of C escapes is represented as
|
|
\fB\(dqa\\ab\\bf\\fn\\nr\\rt\\tv\\v\(dq\fR.
|
|
.PP
|
|
It is valid to initialize \fBws_escape\fR elements to zero. In this
|
|
case, no backslash translation occurs.
|
|
.PP
|
|
The handling of octal and hex escapes is controlled by the following
|
|
bits in \fBws_options\fR:
|
|
.TP
|
|
.B WRDSO_BSKEEP_WORD
|
|
When an unrecognized escape sequence is encountered in a word,
|
|
preserve it on output. If that bit is not set, the backslash is
|
|
removed from such sequences.
|
|
.TP
|
|
.B WRDSO_OESC_WORD
|
|
Handle octal escapes in words.
|
|
.TP
|
|
.B WRDSO_XESC_WORD
|
|
Handle hex escapes in words.
|
|
.TP
|
|
.B WRDSO_BSKEEP_QUOTE
|
|
When an unrecognized escape sequence is encountered in a doubly-quoted
|
|
string, preserve it on output. If that bit is not set, the backslash is
|
|
removed from such sequences.
|
|
.TP
|
|
.B WRDSO_OESC_QUOTE
|
|
Handle octal escapes in doubly-quoted strings.
|
|
.TP
|
|
.B WRDSO_XESC_QUOTE
|
|
Handle hex escapes in doubly-quoted strings.
|
|
.SS Command substitution
|
|
During \fIcommand substitution\fR, each word is scanned for commands.
|
|
Each command found is executed and replaced by the output it creates.
|
|
.PP
|
|
The syntax is:
|
|
.PP
|
|
.RS +4
|
|
.BI $( command )
|
|
.RE
|
|
.PP
|
|
Command substitutions may be nested.
|
|
.PP
|
|
Unless the substitution appears within double quotes, word splitting and
|
|
pathname expansion are performed on its result.
|
|
.PP
|
|
To enable command substitution, the caller must initialize the
|
|
.I ws_command
|
|
member with the address of the substitution function and make sure the
|
|
.B WRDSF_NOCMD
|
|
flag is not set.
|
|
.PP
|
|
The substitution function should be defined as follows:
|
|
.PP
|
|
.RS +4
|
|
\fBint \fIcommand\fB\
|
|
(char **\fIret\fB,\
|
|
const char *\fIcmd\fB,\
|
|
size_t \fIlen,\fB\
|
|
char **\fIargv\fB,\
|
|
void *\fIclos\fB);\fR
|
|
.RE
|
|
.PP
|
|
First \fIlen\fR bytes of \fIcmd\fR contain the command invocation as
|
|
it appeared between
|
|
.BR $( and ),
|
|
with all expansions performed. If the
|
|
.I WRDSO_ARGV
|
|
option is set, the parameter \fIargv\fR contains the command line split into
|
|
words using the same settings as the input \fIws\fR structure.
|
|
Otherwise, \fIargv\fR is \fBNULL\fR.
|
|
.PP
|
|
The \fIclos\fR parameter supplies user-specific data, passed in the
|
|
\fIws_closure\fR member).
|
|
.PP
|
|
On success, the function stores a pointer to the
|
|
output string in the memory location pointed to by \fIret\fR and
|
|
returns \fBWRDSE_OK\fR (\fB0\fR). On error, it must return one of the
|
|
error codes described in the section
|
|
.BR "ERROR CODES" .
|
|
If
|
|
.BR WRDSE_USERERR ,
|
|
is returned, a pointer to the error description string must be stored in
|
|
.BR *ret .
|
|
.PP
|
|
When \fBWRDSE_OK\fR or \fBWRDSE_USERERR\fR is returned, the
|
|
data stored in \fB*ret\fR must be allocated using
|
|
.BR malloc (3).
|
|
.SS Pathname expansion
|
|
Pathname expansion is performed if the \fBWRDSF_PATHEXPAND\fR flag is
|
|
set. Each unquoted word is scanned for characters
|
|
.BR * , ? ", and " [ .
|
|
If one of these appears, the word is considered a \fIpattern\fR (in
|
|
the sense of
|
|
.BR glob (3))
|
|
and is replaced with an alphabetically sorted list of file names matching the
|
|
pattern.
|
|
.PP
|
|
If no matches are found for a word
|
|
and the \fIws_options\fR member has the
|
|
.B WRDSO_NULLGLOB
|
|
bit set, the word is removed.
|
|
.PP
|
|
If the \fBWRDSO_FAILGLOB\fR option is set, an error message is output
|
|
for each such word using
|
|
.IR ws_error .
|
|
.PP
|
|
When matching a pattern, the dot at the start of a name or immediately
|
|
following a slash must be matched explicitly, unless
|
|
the \fBWRDSO_DOTGLOB\fR option is set,
|
|
.SH WORDSPLIT_T STRUCTURE
|
|
The data type \fBwordsplit_t\fR has three members that contain
|
|
output data upon return from \fBwordsplit\fR or \fBwordsplit_len\fR,
|
|
and a number of members that the caller can initialize on input in
|
|
order to customize the function behavior. Each its member has a
|
|
corresponding flag bit, which must be set in the \fIflags\fR argument
|
|
in order to instruct the \fBwordsplit\fR function to use it.
|
|
.SS OUTPUT
|
|
.TP
|
|
.BI size_t " ws_wordc"
|
|
Number of words in \fIws_wordv\fR. Accessible upon successful return
|
|
from \fBwordsplit\fR.
|
|
.TP
|
|
.BI "char ** " ws_wordv
|
|
Array of resulting words. Accessible upon successful return
|
|
from \fBwordsplit\fR.
|
|
.TP
|
|
.BI "int " ws_errno
|
|
Error code, if the invocation of \fBwordsplit\fR or
|
|
\fBwordsplit_len\fR failed. This is the same value as returned from
|
|
the function in that case.
|
|
.PP
|
|
The caller should not attempt to free or reallocate \fIws_wordv\fR or
|
|
any elements thereof, nor to modify \fIws_wordc\fR.
|
|
.PP
|
|
To store away the words for use after freeing \fIws\fR with
|
|
.BR wordsplit_free ,
|
|
the caller should use
|
|
.BR wordsplit_getwords .
|
|
It is more effective than copying the contents of
|
|
.I ws_wordv
|
|
manually.
|
|
.SS INPUT
|
|
.TP
|
|
.BI "size_t " ws_offs
|
|
If the
|
|
.B WRDSF_DOOFFS
|
|
flag is set, this member specifies the number of initial elements in
|
|
.I ws_wordv
|
|
to fill with NULLs. These elements are not counted in the returned
|
|
.IR ws_wordc .
|
|
.TP
|
|
.BI "int " ws_flags
|
|
Contains flags passed to wordsplit on entry. Can be used as a
|
|
read-only member when using \fBwordsplit\fR in incremental mode or
|
|
in a loop with
|
|
.B WRDSF_REUSE
|
|
flag set.
|
|
.TP
|
|
.BI "int " ws_options
|
|
Additional options used when
|
|
.B WRDSF_OPTIONS
|
|
is set.
|
|
.TP
|
|
.BI "const char *" ws_delim
|
|
Word delimiters. If initialized on input, the
|
|
.B WRDSF_DELIM
|
|
flag must be set. Otherwise, it is initialized on entry to
|
|
.B wordsplit
|
|
with the string \fB\(dq \\t\\n\(dq\fR.
|
|
.TP
|
|
.BI "const char *" ws_comment
|
|
A zero-terminated string of characters that begin an inline comment.
|
|
If initialized on input, the
|
|
.B WRDSF_COMMENT
|
|
flag must be set. By default, it's value is \fB\(dq#\(dq\fR.
|
|
.TP
|
|
.BI "const char *" ws_escape [2]
|
|
Escape tables for unquoted words (\fBws_escape[0]\fR) and quoted
|
|
strings (\fBws_escape[1]\fR). These are used to translate escape
|
|
sequences (\fB\\\fIC\fR) into characters. Each table is a string
|
|
consisting of even number of charactes. In each pair of characters,
|
|
the first one is a character that can appear after backslash, and the
|
|
following one is its representation. For example, the string
|
|
\fB\(dqt\\tn\\n\(dq\fR translates \fB\\t\fR into horisontal
|
|
tabulation character and \fB\\n\fR into newline.
|
|
.B WRDSF_ESCAPE
|
|
flag must be set if this member is initialized.
|
|
.TP
|
|
.BI "void (*" ws_alloc_die ") (wordsplit_t *)"
|
|
This function is called when
|
|
.B wordsplit
|
|
is unable to allocate memory and the
|
|
.B WRDSF_ENOMEMABRT
|
|
flag was set. The default function prints a
|
|
message on standard error and aborts. This member can be used
|
|
to customize error handling. If initialized, the
|
|
.B WRDSF_ALLOC_DIE
|
|
flag must be set.
|
|
.TP
|
|
.BI "void (*" ws_error ") (const char *, ...)"
|
|
Pointer to function used for error reporting. The invocation
|
|
convention is the same as for
|
|
.BR printf (3).
|
|
The default function formats and prints the message on the standard
|
|
error.
|
|
|
|
If this member is initialized, the
|
|
.B WRDSF_ERROR
|
|
flag must be set.
|
|
.TP
|
|
.BI "void (*" ws_debug ") (const char *, ...)"
|
|
Pointer to function used for debugging output. By default it points
|
|
to the same function as
|
|
.BR ws_error .
|
|
If initialized, the
|
|
.B WRDSF_DEBUG
|
|
flag must be set.
|
|
.TP
|
|
.BR "const char **" ws_env
|
|
A \fBNULL\fR-terminated array of environment variables. It is used
|
|
during variable expansion. If set, the
|
|
.B WRDSF_ENV
|
|
flag must be set. Variable expansion is enabled only if either
|
|
.B WRDSF_ENV
|
|
or
|
|
.B WRDSF_GETVAR
|
|
(see below) is set, and
|
|
.B WRDSF_NOVAR
|
|
flag is not set.
|
|
|
|
Each element of
|
|
.I ws_env
|
|
must have the form \fB\(dq\fINAME\fB=\fIVALUE\fR, where \fINAME\fR is
|
|
the name of the variable, and \fIVALUE\fR is its value.
|
|
Alternatively, if the \fBWRDSF_ENV_KV\fR flag is set, each variable is
|
|
described by two elements of
|
|
.IR ws_env :
|
|
one containing variable name, and the next one with its
|
|
value.
|
|
.TP
|
|
.BI "int (*" ws_getvar ") (char **ret, const char *var, size_t len, void *clos)"
|
|
Points to the function that will be used during variable expansion to
|
|
look up for the value of the environment variable named \fBvar\fR.
|
|
This function is used if the variable expansion is enabled (i.e. the
|
|
.B WRDSF_NOVAR
|
|
flag is not set), and the \fBWRDSF_GETVAR\fR flag is set.
|
|
|
|
If both
|
|
.B WRDSF_ENV
|
|
and
|
|
.B WRDSF_GETVAR
|
|
are set, the variable is first looked up in the
|
|
.I ws_env
|
|
array and, if not found there,
|
|
.I ws_getvar
|
|
is called.
|
|
|
|
The name of the variable is specified by the first \fIlen\fR bytes of
|
|
the string \fIvar\fR. The \fIclos\fR parameter supplies the
|
|
user-specific data (see below the description of \fIws_closure\fR
|
|
member) and the \fBret\fR parameter points to the memory location
|
|
where output data is to be stored. On success, the function must
|
|
store ther a pointer to the string with the value of the variable and
|
|
return 0. On error, it must return one of the error codes described
|
|
in the section
|
|
.BR "ERROR CODES" .
|
|
If \fIws_getvar\fR returns
|
|
.BR WRDSE_USERERR ,
|
|
it must store the pointer to the error description string in
|
|
.BR *ret .
|
|
In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the
|
|
data returned in \fBret\fR must be allocated using
|
|
.BR malloc (3).
|
|
.TP
|
|
.BI "void *" ws_closure
|
|
Additional user-specific data passed as the last argument to
|
|
.I ws_getvar
|
|
or
|
|
.I ws_command
|
|
(see below). If defined, the
|
|
.B WRDSF_CLOSURE
|
|
flag must be set.
|
|
.TP
|
|
\fBint (*\fIws_command\fB)\
|
|
(char **ret,\
|
|
const char *cmd,\
|
|
size_t len,\
|
|
char **argv,\
|
|
void *clos)\fR
|
|
Pointer to the function that performs command substitution. It treats
|
|
the first \fIlen\fR bytes of the string \fIcmd\fR as a command
|
|
(whatever it means for the caller) and attempts to execute it. On
|
|
success, a pointer to the string with the command output is stored
|
|
in the memory location pointed to by \fBret\fR and \fB0\fR is
|
|
returned. On error,
|
|
the function must return one of the error codes described in the section
|
|
.BR "ERROR CODES" .
|
|
If \fIws_command\fR returns
|
|
.BR WRDSE_USERERR ,
|
|
it must store the pointer to the error description string in
|
|
.BR *ret .
|
|
In any case (whether returning \fB0\fR or \fBWRDSE_USERERR\fR) , the
|
|
data returned in \fBret\fR must be allocated using
|
|
.BR malloc (3).
|
|
|
|
If the
|
|
.I WRDSO_ARGV
|
|
option is set, the parameter \fBargv\fR contains the command split into
|
|
words using the same settings as the input \fIws\fR structure, with
|
|
command substitution disabled.
|
|
|
|
The \fIclos\fR parameter supplies user-specific data (see the
|
|
description of \fIws_closure\fR member).
|
|
.SH FLAGS
|
|
The following macros are defined for use in the \fBflags\fR argument.
|
|
.TP
|
|
.B WRDSF_DEFFLAGS
|
|
Default flags. This is a shortcut for:
|
|
|
|
\fB(WRDSF_NOVAR |\
|
|
WRDSF_NOCMD |\
|
|
WRDSF_QUOTE |\
|
|
WRDSF_SQUEEZE_DELIMS |\
|
|
WRDSF_CESCAPES)\fR,
|
|
|
|
i.e.: disable variable expansion and quote substituton, perform quote
|
|
removal, treat any number of consequtive delimiters as a single
|
|
delimiter, replace \fBC\fR escapes appearing in the input string with
|
|
the corresponding characters.
|
|
.TP
|
|
.B WRDSF_APPEND
|
|
Append the words found to the array resulting from a previous call to
|
|
\fBwordsplit\fR.
|
|
.TP
|
|
.B WRDSF_DOOFFS
|
|
Insert
|
|
.I ws_offs
|
|
initial
|
|
.BR NULL s
|
|
in the array
|
|
.IR ws_wordv .
|
|
These are not counted in the returned
|
|
.IR ws_wordc .
|
|
.TP
|
|
.B WRDSF_NOCMD
|
|
Don't do command substitution.
|
|
.TP
|
|
.B WRDSF_REUSE
|
|
The parameter \fIws\fR resulted from a previous call to
|
|
\fBwordsplit\fR, and \fBwordsplit_free\fR was not called. Reuse the
|
|
allocated storage.
|
|
.TP
|
|
.B WRDSF_SHOWERR
|
|
Print errors using
|
|
.BR ws_error .
|
|
.TP
|
|
.B WRDSF_UNDEF
|
|
Consider it an error if an undefined variable is expanded.
|
|
.TP
|
|
.B WRDSF_NOVAR
|
|
Don't do variable expansion.
|
|
.TP
|
|
.B WRDSF_ENOMEMABRT
|
|
Abort on
|
|
.B ENOMEM
|
|
error. By default, out of memory errors are treated as any other
|
|
errors: the error is reported using \fIws_error\fR if the
|
|
.B WRDSF_SHOWERR
|
|
flag is set, and error code is returned. If this flag is set, the
|
|
.B ws_alloc_die
|
|
function is called instead. This function is not supposed to return.
|
|
.TP
|
|
.B WRDSF_WS
|
|
Trim off any leading and trailind whitespace from the returned
|
|
words. This flag is useful if the \fIws_delim\fR member does not
|
|
contain whitespace characters.
|
|
.TP
|
|
.B WRDSF_SQUOTE
|
|
Handle single quotes.
|
|
.TP
|
|
.B WRDSF_DQUOTE
|
|
Handle double quotes.
|
|
.TP
|
|
.B WRDSF_QUOTE
|
|
A shortcut for \fB(WRDSF_SQUOTE|WRDSF_DQUOTE)\fR.
|
|
.TP
|
|
.B WRDSF_SQUEEZE_DELIMS
|
|
Replace each input sequence of repeated delimiters with a single
|
|
delimiter.
|
|
.TP
|
|
.B WRDSF_RETURN_DELIMS
|
|
Return delimiters.
|
|
.TP
|
|
.B WRDSF_SED_EXPR
|
|
Treat
|
|
.BR sed (1) expressions as words.
|
|
.TP
|
|
.B WRDSF_DELIM
|
|
.I ws_delim
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_COMMENT
|
|
.I ws_comment
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_ALLOC_DIE
|
|
.I ws_alloc_die
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_ERROR
|
|
.I ws_error
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_DEBUG
|
|
.I ws_debug
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_ENV
|
|
.I ws_env
|
|
member is initialized.
|
|
.TP
|
|
.B WRDSF_GETVAR
|
|
.I ws_getvar member is initialized.
|
|
.TP
|
|
.B WRDSF_SHOWDBG
|
|
Enable debugging.
|
|
.TP
|
|
.B WRDSF_NOSPLIT
|
|
Don't split input into words. This flag is is useful for side
|
|
effects, e.g. to perform variable expansion within a string.
|
|
.TP
|
|
.B WRDSF_KEEPUNDEF
|
|
Keep undefined variables in place, instead of expanding them to
|
|
empty strings.
|
|
.TP
|
|
.B WRDSF_WARNUNDEF
|
|
Warn about undefined variables.
|
|
.TP
|
|
.B WRDSF_CESCAPES
|
|
Handle \fBC\fR-style escapes in the input string.
|
|
.TP
|
|
.B WRDSF_CLOSURE
|
|
.I ws_closure
|
|
is set.
|
|
.TP
|
|
.B WRDSF_ENV_KV
|
|
Each two consecutive elements in the
|
|
.I ws_env
|
|
array describe a single variable:
|
|
.IR ws_env [ n ]
|
|
contains variable name, and
|
|
.IR ws_env [ "n+1" ]
|
|
contains its value.
|
|
.TP
|
|
.B WRDSF_ESCAPE
|
|
.I ws_escape
|
|
is set.
|
|
.TP
|
|
.B WRDSF_INCREMENTAL
|
|
Incremental mode. Each subsequent call to \fBwordsplit\fR with
|
|
\fBNULL\fR as its first argument parses the next word from the input.
|
|
See the section
|
|
.B INCREMENTAL MODE
|
|
for a detailed discussion.
|
|
.TP
|
|
.B WRDSF_PATHEXPAND
|
|
Perform pathname and tilde expansion. If this flag is set, the
|
|
\fIws_options\fR member must also be initialized. See the
|
|
subsection
|
|
.B "Pathname expansion"
|
|
for details.
|
|
.TP
|
|
.B WRDSF_OPTIONS
|
|
The
|
|
.I ws_options
|
|
member is initialized.
|
|
.SH OPTIONS
|
|
The
|
|
.I ws_options
|
|
member is consulted if the
|
|
.B WRDSF_OPTIONS
|
|
flag is set. It contains a bitwise \fBOR\fR of one or more of the
|
|
following options:
|
|
.TP
|
|
.B WRDSO_NULLGLOB
|
|
Remove the words that produce empty string after pathname expansion.
|
|
.TP
|
|
.B WRDSO_FAILGLOB
|
|
Output error message if pathname expansion produces empty string.
|
|
.TP
|
|
.B WRDSO_DOTGLOB
|
|
During pathname expansion allow a leading period to be matched by
|
|
metacharacters.
|
|
.TP
|
|
.B WRDSO_ARGV
|
|
Split command invocation into words and pass the result to the
|
|
\fIws_command\fR function in \fIargv\fR parameter.
|
|
.PP
|
|
.TP
|
|
.B WRDSO_BSKEEP_WORD
|
|
Quote removal: when an unrecognized escape sequence is encountered in a word,
|
|
preserve it on output. If that bit is not set, the backslash is
|
|
removed from such sequences.
|
|
.TP
|
|
.B WRDSO_OESC_WORD
|
|
Quote removal: handle octal escapes in words.
|
|
.TP
|
|
.B WRDSO_XESC_WORD
|
|
Quote removal: handle hex escapes in words.
|
|
.TP
|
|
.B WRDSO_BSKEEP_QUOTE
|
|
Quote removal: when an unrecognized escape sequence is encountered in
|
|
a doubly-quoted string, preserve it on output. If that bit is not
|
|
set, the backslash is removed from such sequences.
|
|
.TP
|
|
.B WRDSO_OESC_QUOTE
|
|
Quote removal: handle octal escapes in doubly-quoted strings.
|
|
.TP
|
|
.B WRDSO_XESC_QUOTE
|
|
Quote removal: handle hex escapes in doubly-quoted strings.
|
|
.SH "ERROR CODES"
|
|
.TP
|
|
.BR WRDSE_OK ", " WRDSE_EOF
|
|
Successful return.
|
|
.TP
|
|
.B WRDSE_QUOTE
|
|
Missing closing quote. The \fIws_endp\fR points to the position in
|
|
the input string where the error occurred.
|
|
.TP
|
|
.B WRDSE_NOSPACE
|
|
Memory exhausted.
|
|
.TP
|
|
.B WRDSE_USAGE
|
|
Invalid wordsplit usage.
|
|
.TP
|
|
.B WRDSE_CBRACE
|
|
Unbalanced curly brace.
|
|
.TP
|
|
.B WRDSE_UNDEF
|
|
Undefined variable. This error is returned only if the
|
|
\fBWRDSF_UNDEF\fR flag is set.
|
|
.TP
|
|
.B WRDSE_NOINPUT
|
|
Input exhausted. This is not acually an error. This code is returned
|
|
if \fBwordsplit\fR (or \fBwordsplit_len\fR) is invoked in incremental
|
|
mode and encounters end of input string. See the section
|
|
.BR "INCREMENTAL MODE" .
|
|
.TP
|
|
.B WRDSE_PAREN
|
|
Unbalanced parenthesis.
|
|
.TP
|
|
.B WRDSE_GLOBERR
|
|
An error occurred during pattern matching.
|
|
.TP
|
|
.B WRDSE_USERERR
|
|
User-defined error. Normally this error is returned by \fBws_getvar\fR or
|
|
\fBws_command\fR. Use the function
|
|
.B wordsplit_strerror
|
|
to get textual description of the error.
|
|
.SH "RETURN VALUE"
|
|
Both
|
|
.B wordsplit
|
|
and
|
|
.B wordsplit_len
|
|
return \fB0\fR on success, and a non-zero error code on
|
|
error (see the section
|
|
.BR "ERROR CODES" ).
|
|
.PP
|
|
.B wordsplit_strerror
|
|
returns a pointer to the constant string describing the last error
|
|
condition that occurred in
|
|
.IR ws .
|
|
.SH EXAMPLE
|
|
The short program below implements a function that parses the
|
|
input string similarly to the shell. All expansions are performed.
|
|
Default error reporting is used.
|
|
.PP
|
|
.EX
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
#include <wordsplit.h>
|
|
|
|
/* Run command from \fIstr\fR (\fIlen\fR bytes long) and store its
|
|
output in \fIret\fR.
|
|
\fIargv\fR and \fIclosure\fR are not used.
|
|
Return wordsplit error code.
|
|
*/
|
|
static int runcmd(char **ret, const char *str, size_t len,
|
|
char **argv, void *closure)
|
|
{
|
|
FILE *fp;
|
|
char *cmd;
|
|
int c, lastc;
|
|
char *buffer = NULL;
|
|
size_t bufsize = 0;
|
|
size_t buflen = 0;
|
|
|
|
/* Convert to a null-terminated string for \fBpopen\fR(3) */
|
|
cmd = malloc(len + 1);
|
|
if (!cmd)
|
|
return WRDSE_NOSPACE;
|
|
memcpy(cmd, str, len);
|
|
cmd[len] = 0;
|
|
|
|
fp = popen(cmd, "r");
|
|
if (!fp) {
|
|
char buf[128];
|
|
|
|
snprintf(buf, sizeof buf, "can't run %s: %s",
|
|
cmd, strerror(errno));
|
|
*ret = strdup(buf);
|
|
if (!*ret)
|
|
return WRDSE_NOSPACE;
|
|
else
|
|
return WRDSE_USERERR;
|
|
}
|
|
|
|
/* Collect the output, reallocating \fIbuffer\fR as needed. */
|
|
while ((c = fgetc(fp)) != EOF) {
|
|
lastc = c;
|
|
if (c == '\n')
|
|
c = ' ';
|
|
if (buflen == bufsize) {
|
|
char *p;
|
|
|
|
if (bufsize == 0)
|
|
bufsize = 80;
|
|
else
|
|
bufsize *= 2;
|
|
p = realloc(buffer, bufsize);
|
|
if (!p) {
|
|
free(buffer);
|
|
free(cmd);
|
|
return WRDSE_NOSPACE;
|
|
}
|
|
buffer = p;
|
|
}
|
|
buffer[buflen++] = c;
|
|
}
|
|
|
|
/* Tream off the trailing newline */
|
|
if (buffer) {
|
|
if (lastc == '\n')
|
|
--buflen;
|
|
buffer[buflen] = 0;
|
|
}
|
|
|
|
pclose(fp);
|
|
free(cmd);
|
|
|
|
/* Return the composed string. */
|
|
*ret = buffer;
|
|
return WRDSE_OK;
|
|
}
|
|
|
|
extern char **environ;
|
|
|
|
/* Parse \fIs\fR much as shell does. Return array of words on
|
|
succes, and NULL on error.
|
|
*/
|
|
char **shell_parse(char *s)
|
|
{
|
|
wordsplit_t ws;
|
|
size_t wc;
|
|
char **wv;
|
|
int rc;
|
|
|
|
/* Initialize \fIws\fR */
|
|
ws.ws_env = (const char **) environ;
|
|
ws.ws_command = runcmd;
|
|
/* Call \fBwordsplit\fR. Let it report errors, if any. */
|
|
rc = wordsplit(s, &ws,
|
|
WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_PATHEXPAND
|
|
| WRDSF_SHOWERR);
|
|
if (rc == WRDSE_OK)
|
|
/* Store away the resulting words on success. */
|
|
wordsplit_getwords(&ws, &wc, &wv);
|
|
else
|
|
wv = NULL;
|
|
wordsplit_free(&ws);
|
|
return wv;
|
|
}
|
|
.EE
|
|
.SH AUTHORS
|
|
Sergey Poznyakoff
|
|
.SH "BUG REPORTS"
|
|
Report bugs to <gray+grecs@gnu.org.ua>.
|
|
.SH COPYRIGHT
|
|
Copyright \(co 2009-2014 Sergey Poznyakoff
|
|
.br
|
|
.na
|
|
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
|
|
.br
|
|
.ad
|
|
This is free software: you are free to change and redistribute it.
|
|
There is NO WARRANTY, to the extent permitted by law.
|
|
.\" Local variables:
|
|
.\" eval: (add-hook 'write-file-hooks 'time-stamp)
|
|
.\" time-stamp-start: ".TH [A-Z_][A-Z0-9_]* [0-9] \""
|
|
.\" time-stamp-format: "%:B %:d, %:y"
|
|
.\" time-stamp-end: "\""
|
|
.\" time-stamp-line-limit: 20
|
|
.\" end:
|
|
|