Synchronize wordsplit with the Mailutils version.

This commit is contained in:
Sergey Poznyakoff 2012-01-13 23:09:32 +02:00
parent dd69afd04e
commit 55bf2d1d61
2 changed files with 238 additions and 118 deletions

View file

@ -86,9 +86,28 @@ _wsplt_nomem (struct wordsplit *wsp)
return wsp->ws_errno;
}
static void
wordsplit_init0 (struct wordsplit *wsp)
{
if (wsp->ws_flags & WRDSF_REUSE)
{
if (!(wsp->ws_flags & WRDSF_APPEND))
wordsplit_free_words (wsp);
}
else
{
wsp->ws_wordv = NULL;
wsp->ws_wordc = 0;
wsp->ws_wordn = 0;
}
wsp->ws_errno = 0;
wsp->ws_head = wsp->ws_tail = NULL;
}
static int
wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
int flags)
int flags)
{
wsp->ws_flags = flags;
@ -141,24 +160,13 @@ wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
if (!(wsp->ws_flags & WRDSF_COMMENT))
wsp->ws_comment = NULL;
if (wsp->ws_flags & WRDSF_REUSE)
{
if (!(wsp->ws_flags & WRDSF_APPEND))
wordsplit_free_words (wsp);
}
else
{
wsp->ws_wordv = NULL;
wsp->ws_wordc = 0;
wsp->ws_wordn = 0;
}
if (!(wsp->ws_flags & WRDSF_CLOSURE))
wsp->ws_closure = NULL;
wsp->ws_endp = 0;
wsp->ws_errno = 0;
wsp->ws_head = wsp->ws_tail = NULL;
wordsplit_init0 (wsp);
return 0;
}
@ -177,7 +185,7 @@ alloc_space (struct wordsplit *wsp, size_t count)
else if (wsp->ws_wordn < offs + wsp->ws_wordc + count)
{
newalloc = offs + wsp->ws_wordc +
count > ALLOC_INCR ? count : ALLOC_INCR;
(count > ALLOC_INCR ? count : ALLOC_INCR);
ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0]));
}
else
@ -195,14 +203,14 @@ alloc_space (struct wordsplit *wsp, size_t count)
/* Node state flags */
#define _WSNF_NULL 0x01 /* null node (a noop) */
#define _WSNF_NULL 0x01 /* null node (a noop) */
#define _WSNF_WORD 0x02 /* node contains word in v.word */
#define _WSNF_QUOTE 0x04 /* text is quoted */
#define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
#define _WSNF_JOIN 0x10 /* node must be joined with the next node */
#define _WSNF_SEXP 0x20 /* is a sed expression */
#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
#define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
wordsplit_add_segm must add the
segment even if it is empty */
@ -367,8 +375,7 @@ wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
}
static int
wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end,
int flg)
wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg)
{
struct wordsplit_node *node;
int rc;
@ -378,7 +385,7 @@ wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end,
rc = wsnode_new (wsp, &node);
if (rc)
return rc;
node->flags = flg & ~(_WSNF_WORD|_WSNF_EMPTYOK);
node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
node->v.segm.beg = beg;
node->v.segm.end = end;
wsnode_append (wsp, node);
@ -413,7 +420,7 @@ wordsplit_dump_nodes (struct wordsplit *wsp)
else
wsp->ws_debug ("%4d: %p: %#04x (%s):%.*s;",
n, p, p->flags, wsnode_flagstr (p->flags),
(int)(p->v.segm.end - p->v.segm.beg),
(int) (p->v.segm.end - p->v.segm.beg),
wsp->ws_input + p->v.segm.beg);
}
}
@ -501,7 +508,12 @@ wsnode_quoteremoval (struct wordsplit *wsp)
p->v.word = newstr;
p->flags |= _WSNF_WORD;
}
uqfn (p->v.word, str, slen);
if (wsp->ws_flags & WRDSF_ESCAPE)
wordsplit_general_unquote_copy (p->v.word, str, slen,
wsp->ws_escape);
else
uqfn (p->v.word, str, slen);
}
}
return 0;
@ -810,8 +822,7 @@ expvar (struct wordsplit *wsp, const char *str, size_t len,
ws.ws_delim = wsp->ws_delim;
if (wordsplit (value, &ws,
WRDSF_NOVAR | WRDSF_NOCMD |
WRDSF_DELIM | WRDSF_SQUEEZE_DELIMS))
WRDSF_NOVAR | WRDSF_NOCMD | WRDSF_DELIM | WRDSF_WS))
{
wordsplit_free (&ws);
return 1;
@ -907,6 +918,24 @@ node_expand_vars (struct wordsplit *wsp, struct wordsplit_node *node)
return 0;
}
/* Remove NULL lists */
static void
wsnode_nullelim (struct wordsplit *wsp)
{
struct wordsplit_node *p;
for (p = wsp->ws_head; p;)
{
struct wordsplit_node *next = p->next;
if (p->flags & _WSNF_NULL)
{
wsnode_remove (wsp, p);
wsnode_free (p);
}
p = next;
}
}
static int
wordsplit_varexp (struct wordsplit *wsp)
{
@ -921,18 +950,7 @@ wordsplit_varexp (struct wordsplit *wsp)
p = next;
}
/* Remove NULL lists */
for (p = wsp->ws_head; p;)
{
struct wordsplit_node *next = p->next;
if (p->flags & _WSNF_NULL)
{
wsnode_remove (wsp, p);
wsnode_free (p);
}
p = next;
}
wsnode_nullelim (wsp);
return 0;
}
@ -957,10 +975,14 @@ wordsplit_trimws (struct wordsplit *wsp)
;
p->v.segm.beg = n;
/* Trim trailing whitespace */
for (n = p->v.segm.end; n > p->v.segm.beg && ISWS (wsp->ws_input[n-1]);
n--);
for (n = p->v.segm.end;
n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
p->v.segm.end = n;
if (p->v.segm.beg == p->v.segm.end)
p->flags |= _WSNF_NULL;
}
wsnode_nullelim (wsp);
}
static int
@ -1002,10 +1024,20 @@ skip_delim (struct wordsplit *wsp)
size_t start = wsp->ws_endp;
if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
{
do
start++;
while (start < wsp->ws_len
&& ISDELIM (wsp, wsp->ws_input[start]));
if ((wsp->ws_flags & WRDSF_RETURN_DELIMS) &&
ISDELIM (wsp, wsp->ws_input[start]))
{
int delim = wsp->ws_input[start];
do
start++;
while (start < wsp->ws_len && delim == wsp->ws_input[start]);
}
else
{
do
start++;
while (start < wsp->ws_len && ISDELIM (wsp, wsp->ws_input[start]));
}
start--;
}
@ -1015,9 +1047,9 @@ skip_delim (struct wordsplit *wsp)
return start;
}
#define _WRDS_EOF 0
#define _WRDS_OK 1
#define _WRDS_ERR 2
#define _WRDS_EOF 0
#define _WRDS_OK 1
#define _WRDS_ERR 2
static int
scan_qstring (struct wordsplit *wsp, size_t start, size_t * end)
@ -1032,7 +1064,7 @@ scan_qstring (struct wordsplit *wsp, size_t start, size_t * end)
j++;
if (j < len && command[j] == q)
{
int flags = _WSNF_QUOTE|_WSNF_EMPTYOK;
int flags = _WSNF_QUOTE | _WSNF_EMPTYOK;
if (q == '\'')
flags |= _WSNF_NOEXPAND;
if (wordsplit_add_segm (wsp, start + 1, j, flags))
@ -1100,7 +1132,8 @@ scan_word (struct wordsplit *wsp, size_t start)
continue;
}
if (command[i] == '\'' || command[i] == '"')
if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') ||
((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"'))
{
if (join && wsp->ws_tail)
wsp->ws_tail->flags |= _WSNF_JOIN;
@ -1121,11 +1154,7 @@ scan_word (struct wordsplit *wsp, size_t start)
}
else if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
{
do
{
i++;
}
while (i < len && ISDELIM (wsp, command[i]));
i++;
}
else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
flags |= _WSNF_EMPTYOK;
@ -1135,7 +1164,8 @@ scan_word (struct wordsplit *wsp, size_t start)
if (wordsplit_add_segm (wsp, start, i, flags))
return _WRDS_ERR;
wsp->ws_endp = i;
if (wsp->ws_flags & WRDSF_INCREMENTAL)
return _WRDS_EOF;
return _WRDS_OK;
}
@ -1217,6 +1247,21 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
return len;
}
void
wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
const char *escapable)
{
int i;
for (i = 0; i < n;)
{
if (src[i] == '\\' && i < n && strchr (escapable, src[i + 1]))
i++;
*dst++ = src[i++];
}
*dst = 0;
}
void
wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n)
{
@ -1335,33 +1380,26 @@ wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
}
}
int
wordsplit_len (const char *command, size_t len, struct wordsplit *wsp,
int flags)
static int
wordsplit_process_list (struct wordsplit *wsp, size_t start)
{
int rc;
size_t start = 0;
rc = wordsplit_init (wsp, command, len, flags);
if (rc)
return rc;
if (wsp->ws_flags & WRDSF_SHOWDBG)
wsp->ws_debug ("Input:%.*s;", (int)len, command);
if (wsp->ws_flags & WRDSF_NOSPLIT)
{
/* Treat entire input as a quoted argument */
if (wordsplit_add_segm (wsp, 0, len, _WSNF_QUOTE))
if (wordsplit_add_segm (wsp, start, wsp->ws_len, _WSNF_QUOTE))
return wsp->ws_errno;
}
else
{
int rc;
while ((rc = scan_word (wsp, start)) == _WRDS_OK)
start = skip_delim (wsp);
/* Make sure tail element is not joinable */
if (wsp->ws_tail)
wsp->ws_tail->flags &= ~_WSNF_JOIN;
if (rc == _WRDS_ERR)
return wsp->ws_errno;
}
if (wsp->ws_flags & WRDSF_SHOWDBG)
@ -1369,11 +1407,6 @@ wordsplit_len (const char *command, size_t len, struct wordsplit *wsp,
wsp->ws_debug ("Initial list:");
wordsplit_dump_nodes (wsp);
}
if (rc)
{
wordsplit_free_nodes (wsp);
return wsp->ws_errno;
}
if (wsp->ws_flags & WRDSF_WS)
{
@ -1419,10 +1452,75 @@ wordsplit_len (const char *command, size_t len, struct wordsplit *wsp,
wsp->ws_debug ("Coalesced list:");
wordsplit_dump_nodes (wsp);
}
wordsplit_finish (wsp);
}
while (0);
return wsp->ws_errno;
}
int
wordsplit_len (const char *command, size_t length, struct wordsplit *wsp,
int flags)
{
int rc;
size_t start;
const char *cmdptr;
size_t cmdlen;
if (!command)
{
if (!(flags & WRDSF_INCREMENTAL))
return EINVAL;
start = skip_delim (wsp);
if (wsp->ws_endp == wsp->ws_len)
{
wsp->ws_errno = WRDSE_NOINPUT;
if (wsp->ws_flags & WRDSF_SHOWERR)
wordsplit_perror (wsp);
return wsp->ws_errno;
}
cmdptr = wsp->ws_input + wsp->ws_endp;
cmdlen = wsp->ws_len - wsp->ws_endp;
wsp->ws_flags |= WRDSF_REUSE;
wordsplit_init0 (wsp);
}
else
{
cmdptr = command;
cmdlen = length;
start = 0;
rc = wordsplit_init (wsp, cmdptr, cmdlen, flags);
if (rc)
return rc;
}
if (wsp->ws_flags & WRDSF_SHOWDBG)
wsp->ws_debug ("Input:%.*s;", (int) cmdlen, cmdptr);
rc = wordsplit_process_list (wsp, start);
if (rc == 0 && (flags & WRDSF_INCREMENTAL))
{
while (!wsp->ws_head && wsp->ws_endp < wsp->ws_len)
{
start = skip_delim (wsp);
if (wsp->ws_flags & WRDSF_SHOWDBG)
{
cmdptr = wsp->ws_input + wsp->ws_endp;
cmdlen = wsp->ws_len - wsp->ws_endp;
wsp->ws_debug ("Restart:%.*s;", (int) cmdlen, cmdptr);
}
rc = wordsplit_process_list (wsp, start);
if (rc)
break;
}
}
if (rc)
{
wordsplit_free_nodes (wsp);
return rc;
}
wordsplit_finish (wsp);
wordsplit_free_nodes (wsp);
return wsp->ws_errno;
}
@ -1430,7 +1528,8 @@ wordsplit_len (const char *command, size_t len, struct wordsplit *wsp,
int
wordsplit (const char *command, struct wordsplit *ws, int flags)
{
return wordsplit_len (command, strlen (command), ws, flags);
return wordsplit_len (command, command ? strlen (command) : 0, ws,
flags);
}
void
@ -1492,6 +1591,10 @@ wordsplit_perror (struct wordsplit *wsp)
wsp->ws_error (_("undefined variable"));
break;
case WRDSE_NOINPUT:
wsp->ws_error (_("input exhausted"));
break;
default:
wsp->ws_error (_("unknown error"));
}
@ -1501,10 +1604,11 @@ const char *_wordsplit_errstr[] = {
N_("no error"),
N_("missing closing quote"),
N_("memory exhausted"),
N_("variable expansion and command substitution " "are not yet supported"),
N_("command substitution is not yet supported"),
N_("invalid wordsplit usage"),
N_("unbalanced curly brace"),
N_("undefined variable")
N_("undefined variable"),
N_("input exhausted")
};
int _wordsplit_nerrs =
sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]);

View file

@ -28,11 +28,12 @@ struct wordsplit
int ws_flags;
const char *ws_delim;
const char *ws_comment;
void (*ws_alloc_die) (struct wordsplit *wsp);
const char *ws_escape;
void (*ws_alloc_die) (struct wordsplit * wsp);
void (*ws_error) (const char *, ...)
__attribute__ ((__format__ (__printf__, 1, 2)));
__attribute__ ((__format__ (__printf__, 1, 2)));
void (*ws_debug) (const char *, ...)
__attribute__ ((__format__ (__printf__, 1, 2)));
__attribute__ ((__format__ (__printf__, 1, 2)));
const char **ws_env;
const char *(*ws_getvar) (const char *, size_t, void *);
@ -45,70 +46,82 @@ struct wordsplit
struct wordsplit_node *ws_head, *ws_tail;
};
/* Append the words found to the array resulting from a previous
/* Wordsplit flags. Only 2 bits of a 32-bit word remain unused.
It is getting crowded... */
/* Append the words found to the array resulting from a previous
call. */
#define WRDSF_APPEND 0x0000001
#define WRDSF_APPEND 0x00000001
/* Insert we_offs initial NULLs in the array ws_wordv.
(These are not counted in the returned ws_wordc.) */
#define WRDSF_DOOFFS 0x0000002
#define WRDSF_DOOFFS 0x00000002
/* Don't do command substitution. Reserved for future use. */
#define WRDSF_NOCMD 0x0000004
#define WRDSF_NOCMD 0x00000004
/* The parameter p resulted from a previous call to
wordsplit(), and wordsplit_free() was not called. Reuse the
allocated storage. */
#define WRDSF_REUSE 0x0000008
#define WRDSF_REUSE 0x00000008
/* Print errors */
#define WRDSF_SHOWERR 0x0000010
#define WRDSF_SHOWERR 0x00000010
/* Consider it an error if an undefined shell variable
is expanded. */
#define WRDSF_UNDEF 0x0000020
#define WRDSF_UNDEF 0x00000020
/* Don't do variable expansion. */
#define WRDSF_NOVAR 0x0000040
#define WRDSF_NOVAR 0x00000040
/* Abort on ENOMEM error */
#define WRDSF_ENOMEMABRT 0x0000080
#define WRDSF_ENOMEMABRT 0x00000080
/* Trim off any leading and trailind whitespace */
#define WRDSF_WS 0x0000100
#define WRDSF_WS 0x00000100
/* Handle single quotes */
#define WRDSF_SQUOTE 0x00000200
/* Handle double quotes */
#define WRDSF_DQUOTE 0x00000400
/* Handle quotes and escape directives */
#define WRDSF_QUOTE 0x0000200
#define WRDSF_QUOTE (WRDSF_SQUOTE|WRDSF_DQUOTE)
/* Replace each input sequence of repeated delimiters with a single
delimiter */
#define WRDSF_SQUEEZE_DELIMS 0x0000400
#define WRDSF_SQUEEZE_DELIMS 0x00000800
/* Return delimiters */
#define WRDSF_RETURN_DELIMS 0x0000800
#define WRDSF_RETURN_DELIMS 0x00001000
/* Treat sed expressions as words */
#define WRDSF_SED_EXPR 0x0001000
#define WRDSF_SED_EXPR 0x00002000
/* ws_delim field is initialized */
#define WRDSF_DELIM 0x0002000
#define WRDSF_DELIM 0x00004000
/* ws_comment field is initialized */
#define WRDSF_COMMENT 0x0004000
#define WRDSF_COMMENT 0x00008000
/* ws_alloc_die field is initialized */
#define WRDSF_ALLOC_DIE 0x0008000
#define WRDSF_ALLOC_DIE 0x00010000
/* ws_error field is initialized */
#define WRDSF_ERROR 0x0010000
#define WRDSF_ERROR 0x00020000
/* ws_debug field is initialized */
#define WRDSF_DEBUG 0x0020000
#define WRDSF_DEBUG 0x00040000
/* ws_env field is initialized */
#define WRDSF_ENV 0x0040000
#define WRDSF_ENV 0x00080000
/* ws_getvar field is initialized */
#define WRDSF_GETVAR 0x0080000
#define WRDSF_GETVAR 0x00100000
/* enable debugging */
#define WRDSF_SHOWDBG 0x0100000
#define WRDSF_SHOWDBG 0x00200000
/* Don't split input into words. Useful for side effects. */
#define WRDSF_NOSPLIT 0x0200000
#define WRDSF_NOSPLIT 0x00400000
/* Keep undefined variables in place, instead of expanding them to
empty string */
#define WRDSF_KEEPUNDEF 0x0400000
#define WRDSF_KEEPUNDEF 0x00800000
/* Warn about undefined variables */
#define WRDSF_WARNUNDEF 0x0800000
#define WRDSF_WARNUNDEF 0x01000000
/* Handle C escapes */
#define WRDSF_CESCAPES 0x1000000
#define WRDSF_CESCAPES 0x02000000
/* ws_closure is set */
#define WRDSF_CLOSURE 0x2000000
#define WRDSF_CLOSURE 0x04000000
/* ws_env is a Key/Value environment, i.e. the value of a variable is
stored in the element that follows its name. */
#define WRDSF_ENV_KV 0x4000000
#define WRDSF_ENV_KV 0x08000000
/* ws_escape is set */
#define WRDSF_ESCAPE 0x10000000
/* Incremental mode */
#define WRDSF_INCREMENTAL 0x20000000
#define WRDSF_DEFFLAGS \
(WRDSF_NOVAR | WRDSF_NOCMD | \
@ -121,6 +134,7 @@ struct wordsplit
#define WRDSE_USAGE 4
#define WRDSE_CBRACE 5
#define WRDSE_UNDEF 6
#define WRDSE_NOINPUT 7
int wordsplit (const char *s, struct wordsplit *p, int flags);
int wordsplit_len (const char *s, size_t len,
@ -132,6 +146,8 @@ int wordsplit_c_unquote_char (int c);
int wordsplit_c_quote_char (int c);
size_t wordsplit_c_quoted_length (const char *str, int quote_hex,
int *quote);
void wordsplit_general_unquote_copy (char *dst, const char *src, size_t n,
const char *escapable);
void wordsplit_sh_unquote_copy (char *dst, const char *src, size_t n);
void wordsplit_c_unquote_copy (char *dst, const char *src, size_t n);
void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex);