wordsplit/tests/wsp.c
Sergey Poznyakoff c01a4a61e8 Optionally disable splitting of unexpandable variable and command refs
* include/wordsplit.h (WRDSO_NOVARSPLIT)
(WRDSO_NOCMDSPLIT): New options.
* src/wordsplit.c (scan_word): Treat any variable reference, even
containing whitespace, as a single word if WRDSO_NOVARSPLIT is set.
Ditto for commands and WRDSO_NOCMDSPLIT.
* tests/wordsplit.at: Add new tests.
* tests/wsp.c: Recognize novarsplit and nocmdsplit
options.
For future use: recognize bskeep_words, bskeep_quote, bskeep.
2019-05-13 15:20:24 +03:00

700 lines
14 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* grecs - Gray's Extensible Configuration System
Copyright (C) 2014-2016 Sergey Poznyakoff
Grecs is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
Grecs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with Grecs. If not, see <http://www.gnu.org/licenses/>. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
#include "grecs.h"
#include "wordsplit.h"
extern char **environ;
char *progname;
struct kwd
{
const char *name;
int tok;
};
struct kwd bool_keytab[] = {
{ "append", WRDSF_APPEND },
/*{ "reuse", WRDSF_REUSE },*/
{ "undef", WRDSF_UNDEF },
{ "novar", WRDSF_NOVAR },
{ "nocmd", WRDSF_NOCMD },
{ "ws", WRDSF_WS },
{ "quote", WRDSF_QUOTE },
{ "squote", WRDSF_SQUOTE },
{ "dquote", WRDSF_DQUOTE },
{ "squeeze_delims", WRDSF_SQUEEZE_DELIMS },
{ "return_delims", WRDSF_RETURN_DELIMS },
{ "sed", WRDSF_SED_EXPR },
{ "debug", WRDSF_SHOWDBG },
{ "nosplit", WRDSF_NOSPLIT },
{ "keepundef", WRDSF_KEEPUNDEF },
{ "warnundef", WRDSF_WARNUNDEF },
{ "cescapes", WRDSF_CESCAPES },
{ "default", WRDSF_DEFFLAGS },
{ "env_kv", WRDSF_ENV_KV },
{ "incremental", WRDSF_INCREMENTAL },
{ "pathexpand", WRDSF_PATHEXPAND },
{ NULL, 0 }
};
struct kwd opt_keytab[] = {
{ "nullglob", WRDSO_NULLGLOB },
{ "failglob", WRDSO_FAILGLOB },
{ "dotglob", WRDSO_DOTGLOB },
{ "bskeep_words", WRDSO_BSKEEP_WORD },
{ "bskeep_quote", WRDSO_BSKEEP_QUOTE },
{ "bskeep", WRDSO_BSKEEP_WORD|WRDSO_BSKEEP_QUOTE },
{ "novarsplit", WRDSO_NOVARSPLIT },
{ "nocmdsplit", WRDSO_NOCMDSPLIT },
{ NULL, 0 }
};
struct kwd string_keytab[] = {
{ "delim", WRDSF_DELIM },
{ "comment", WRDSF_COMMENT },
{ "escape", WRDSF_ESCAPE },
{ NULL, 0 }
};
static int
kwxlat (struct kwd *kwp, const char *str, int *res)
{
for (; kwp->name; kwp++)
if (strcmp (kwp->name, str) == 0)
{
*res = kwp->tok;
return 0;
}
return -1;
}
static void
help ()
{
size_t i;
printf ("usage: %s [options] [VAR=VALUE...] [-- EXTRA...]\n", progname);
printf ("options are:\n");
printf (" [-]trimnl\n");
printf (" [-]plaintext\n");
printf (" -env\n");
printf (" env sys|none|null\n");
putchar ('\n');
for (i = 0; bool_keytab[i].name; i++)
printf (" [-]%s\n", bool_keytab[i].name);
putchar ('\n');
for (i = 0; string_keytab[i].name; i++)
{
printf (" -%s\n", string_keytab[i].name);
printf (" %s ARG\n", string_keytab[i].name);
}
printf (" escape-word ARG\n");
printf (" escape-quote ARG\n");
putchar ('\n');
for (i = 0; opt_keytab[i].name; i++)
{
printf (" [-]%s\n", opt_keytab[i].name);
}
putchar ('\n');
printf (" -dooffs\n");
printf (" dooffs COUNT ARGS...\n");
exit (0);
}
void
print_qword (const char *word, int plaintext)
{
static char *qbuf = NULL;
static size_t qlen = 0;
int quote;
size_t size = wordsplit_c_quoted_length (word, 0, &quote);
if (plaintext)
{
printf ("%s", word);
return;
}
if (*word == 0)
quote = 1;
if (size >= qlen)
{
qlen = size + 1;
qbuf = realloc (qbuf, qlen);
assert (qbuf != NULL);
}
wordsplit_c_quote_copy (qbuf, word, 0);
qbuf[size] = 0;
if (quote)
printf ("\"%s\"", qbuf);
else
printf ("%s", qbuf);
}
/* Convert environment to K/V form */
static char **
make_env_kv ()
{
size_t i, j, size;
char **newenv;
/* Count the number of entries */
for (i = 0; environ[i]; i++)
;
size = i * 2 + 1;
newenv = calloc (size, sizeof (newenv[0]));
assert (newenv != NULL);
for (i = j = 0; environ[i]; i++)
{
size_t len = strcspn (environ[i], "=");
char *p = malloc (len+1);
assert (p != NULL);
memcpy (p, environ[i], len);
p[len] = 0;
newenv[j++] = p;
p = strdup (environ[i] + len + 1);
assert (p != NULL);
newenv[j++] = p;
}
newenv[j] = NULL;
return newenv;
}
static int
wsp_getvar (char **ret, const char *vptr, size_t vlen, void *data)
{
char **base = data;
int i;
for (i = 0; base[i]; i++)
{
size_t l = strcspn (base[i], "=");
if (l == vlen && memcmp (base[i], vptr, vlen) == 0)
{
char *p = strdup (base[i] + vlen + 1);
if (p == NULL)
return WRDSE_NOSPACE;
*ret = p;
return WRDSE_OK;
}
}
return WRDSE_UNDEF;
}
static int
cmd_quote (char **ret, const char *str, size_t len, char **argv)
{
int alen;
for (alen = 0; alen < len && !(str[alen] == ' ' || str[alen] == '\t'); alen++)
;
for (; alen < len && (str[alen] == ' ' || str[alen] == '\t'); alen++)
;
len -= alen;
*ret = malloc (len + 1);
if (!*ret)
return WRDSE_NOSPACE;
memcpy (*ret, str + alen, len);
(*ret)[len] = 0;
return WRDSE_OK;
}
static int
cmd_words (char **ret, const char *str, size_t len, char **argv)
{
char *p;
int i;
p = malloc (len + 1);
if (!p)
return WRDSE_NOSPACE;
*ret = p;
for (i = 1; argv[i]; i++)
{
size_t s = strlen (argv[i]);
if (i > 1)
*p++ = ' ';
memcpy (p, argv[i], s);
p += s;
}
*p = 0;
return WRDSE_OK;
}
static int
cmd_lines (char **ret, const char *str, size_t len, char **argv)
{
char *p;
int i;
p = malloc (len + 1);
if (!p)
return WRDSE_NOSPACE;
*ret = p;
for (i = 1; argv[i]; i++)
{
size_t s = strlen (argv[i]);
if (i > 1)
*p++ = '\n';
memcpy (p, argv[i], s);
p += s;
}
*p = 0;
return WRDSE_OK;
}
static struct command
{
char const *name;
int (*cmd)(char **ret, const char *str, size_t len, char **argv);
} comtab[] = {
{ "quote", cmd_quote },
{ "words", cmd_words },
{ "lines", cmd_lines }
};
static int
wsp_runcmd (char **ret, const char *str, size_t len, char **argv, void *closure)
{
int i;
size_t s = 0;
for (i = 0; ; i++)
{
if (i == sizeof (comtab) / sizeof (comtab[0]))
break;
if (strcmp (comtab[i].name, argv[0]) == 0)
return comtab[i].cmd (ret, str, len, argv);
}
*ret = NULL;
if (grecs_asprintf (ret, &s, "unknown command: %s", argv[0]))
return WRDSE_NOSPACE;
else
return WRDSE_USERERR;
}
enum env_type
{
env_none,
env_null,
env_sys
};
struct kwd env_keytab[] = {
{ "none", env_none },
{ "null", env_null },
{ "sys", env_sys },
{ NULL }
};
static void
set_escape_string (wordsplit_t *ws, int *wsflags, int q, const char *str)
{
if (*str == ':')
{
while (*++str != ':')
{
int f;
switch (*str)
{
case '+':
f = WRDSO_BSKEEP;
break;
case '0':
f = WRDSO_OESC;
break;
case 'x':
f = WRDSO_XESC;
break;
default:
fprintf (stderr, "%s: invalid escape flag near %s\n",
progname, str);
abort ();
}
WRDSO_ESC_SET (ws, q, f);
}
*wsflags |= WRDSF_OPTIONS;
++str;
}
ws->ws_escape[q] = str;
}
int
main (int argc, char **argv)
{
char buf[1024], *ptr, *saved_ptr;
int i, offarg = 0;
int trimnl_option = 0;
int plaintext_option = 0;
int wsflags = (WRDSF_DEFFLAGS & ~WRDSF_NOVAR) |
WRDSF_ENOMEMABRT |
WRDSF_SHOWERR;
wordsplit_t ws;
int next_call = 0;
char *fenvbase[128];
size_t fenvidx = 0;
size_t fenvmax = sizeof (fenvbase) / sizeof (fenvbase[0]);
int use_env = env_sys;
int appendc = 0;
char **appendv = NULL;
progname = argv[0];
ws.ws_options = 0;
for (i = 1; i < argc; i++)
{
char *opt = argv[i];
int negate;
int flag;
if (opt[0] == '-')
{
if (opt[1] == '-' && opt[2] == 0)
{
appendc = argc - i - 1;
appendv = argv + i + 1;
break;
}
negate = 1;
opt++;
}
else if (opt[0] == '+')
{
negate = 0;
opt++;
}
else
negate = 0;
if (strcmp (opt, "h") == 0 ||
strcmp (opt, "help") == 0 ||
strcmp (opt, "-help") == 0)
{
help ();
}
if (strcmp (opt, "trimnl") == 0)
{
trimnl_option = !negate;
continue;
}
if (strcmp (opt, "plaintext") == 0)
{
plaintext_option = !negate;
continue;
}
if (strcmp (opt, "env") == 0)
{
if (negate)
use_env = env_none;
else
{
i++;
if (i == argc)
{
fprintf (stderr, "%s: missing argument for env\n",
progname);
exit (1);
}
if (kwxlat (env_keytab, argv[i], &use_env))
{
fprintf (stderr, "%s: invalid argument for env\n",
progname);
exit (1);
}
}
continue;
}
if (kwxlat (bool_keytab, opt, &flag) == 0)
{
if (negate)
wsflags &= ~flag;
else
wsflags |= flag;
continue;
}
if (kwxlat (string_keytab, opt, &flag) == 0)
{
if (negate)
wsflags &= ~flag;
else
{
i++;
if (i == argc)
{
fprintf (stderr, "%s: missing argument for %s\n",
progname, opt);
exit (1);
}
switch (flag)
{
case WRDSF_DELIM:
ws.ws_delim = argv[i];
break;
case WRDSF_COMMENT:
ws.ws_comment = argv[i];
break;
case WRDSF_ESCAPE:
set_escape_string (&ws, &wsflags, 0, argv[i]);
set_escape_string (&ws, &wsflags, 1, argv[i]);
break;
}
wsflags |= flag;
}
continue;
}
if (strcmp (opt, "escape-word") == 0
|| strcmp (opt, "escape-quote") == 0)
{
int q = opt[7] == 'q';
i++;
if (i == argc)
{
fprintf (stderr, "%s: missing argument for %s\n",
progname, opt);
exit (1);
}
if (!(wsflags & WRDSF_ESCAPE))
{
wsflags |= WRDSF_ESCAPE;
ws.ws_escape[!q] = NULL;
}
set_escape_string (&ws, &wsflags, q, argv[i]);
continue;
}
if (strcmp (opt, "dooffs") == 0)
{
if (negate)
wsflags &= ~WRDSF_DOOFFS;
else
{
char *p;
i++;
if (i == argc)
{
fprintf (stderr, "%s: missing arguments for %s\n",
progname, opt);
exit (1);
}
ws.ws_offs = strtoul (argv[i], &p, 10);
if (*p)
{
fprintf (stderr, "%s: invalid number: %s\n",
progname, argv[i]);
exit (1);
}
i++;
if (i + ws.ws_offs > argc)
{
fprintf (stderr, "%s: not enough arguments for %s\n",
progname, opt);
exit (1);
}
offarg = i;
i += ws.ws_offs - 1;
wsflags |= WRDSF_DOOFFS;
}
continue;
}
if (kwxlat (opt_keytab, opt, &flag) == 0)
{
wsflags |= WRDSF_OPTIONS;
if (negate)
ws.ws_options &= ~flag;
else
ws.ws_options |= flag;
continue;
}
if (strcmp (opt, "maxwords") == 0)
{
char *p;
wsflags |= WRDSF_OPTIONS;
ws.ws_options |= WRDSO_MAXWORDS;
i++;
if (i == argc)
{
fprintf (stderr, "%s: missing arguments for %s\n",
progname, opt);
exit (1);
}
ws.ws_maxwords = strtoul (argv[i], &p, 10);
if (*p)
{
fprintf (stderr, "%s: invalid number: %s\n",
progname, argv[i]);
exit (1);
}
continue;
}
if (strchr (opt, '='))
{
assert (fenvidx < fenvmax - 1);
fenvbase[fenvidx++] = opt;
continue;
}
fprintf (stderr, "%s: unrecognized argument: %s\n",
progname, opt);
exit (1);
}
if (fenvidx)
{
fenvbase[fenvidx] = NULL;
wsflags |= WRDSF_GETVAR | WRDSF_CLOSURE;
ws.ws_getvar = wsp_getvar;
ws.ws_closure = fenvbase;
}
switch (use_env)
{
case env_null:
wsflags |= WRDSF_ENV;
ws.ws_env = NULL;
break;
case env_none:
break;
case env_sys:
wsflags |= WRDSF_ENV;
if (wsflags & WRDSF_ENV_KV)
ws.ws_env = (const char **) make_env_kv ();
else
ws.ws_env = (const char **) environ;
break;
}
if (!(wsflags & WRDSF_NOCMD))
ws.ws_command = wsp_runcmd;
if (wsflags & WRDSF_INCREMENTAL)
trimnl_option = 1;
next_call = 0;
while ((ptr = fgets (buf, sizeof (buf), stdin)))
{
int rc;
size_t i;
if (trimnl_option)
{
size_t len = strlen (ptr);
if (len && ptr[len-1] == '\n')
ptr[len-1] = 0;
}
if (wsflags & WRDSF_INCREMENTAL)
{
if (next_call)
{
if (*ptr == 0)
ptr = NULL;
else
free (saved_ptr);
}
else
next_call = 1;
if (ptr)
{
ptr = saved_ptr = strdup (ptr);
assert (ptr != NULL);
}
}
rc = wordsplit (ptr, &ws, wsflags);
if (rc)
{
if (!(wsflags & WRDSF_SHOWERR))
wordsplit_perror (&ws);
continue;
}
if (offarg)
{
for (i = 0; i < ws.ws_offs; i++)
ws.ws_wordv[i] = argv[offarg + i];
offarg = 0;
}
if (appendc)
{
rc = wordsplit_append (&ws, appendc, appendv);
if (rc)
{
if (!(wsflags & WRDSF_SHOWERR))
wordsplit_perror (&ws);
continue;
}
}
wsflags |= WRDSF_REUSE | (ws.ws_flags & WRDSF_ENV);
printf ("NF: %lu", (unsigned long) ws.ws_wordc);
if (wsflags & WRDSF_DOOFFS)
printf (" (%lu)", (unsigned long) ws.ws_offs);
putchar ('\n');
for (i = 0; i < ws.ws_offs; i++)
{
printf ("(%lu): ", (unsigned long) i);
print_qword (ws.ws_wordv[i], plaintext_option);
putchar ('\n');
}
for (; i < ws.ws_offs + ws.ws_wordc; i++)
{
printf ("%lu: ", (unsigned long) i);
print_qword (ws.ws_wordv[i], plaintext_option);
putchar ('\n');
}
printf ("TOTAL: %lu\n", (unsigned long) ws.ws_wordi);
}
return 0;
}